summaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2009-12-01 11:07:05 +0000
committerrdivacky <rdivacky@FreeBSD.org>2009-12-01 11:07:05 +0000
commite7908924d847e63b02bc82bfaa1709ab9c774dcd (patch)
treeffe0478472eaa0686f11cb02c6df7d257b8719b0 /lib/Target
parentbf68f1ea49e39c4194f339ddd4421b0c3a31988b (diff)
downloadFreeBSD-src-e7908924d847e63b02bc82bfaa1709ab9c774dcd.zip
FreeBSD-src-e7908924d847e63b02bc82bfaa1709ab9c774dcd.tar.gz
Update LLVM to r90226.
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp58
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h21
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp88
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h3
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp1
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp42
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp33
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp393
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp20
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td115
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td67
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td2046
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td7
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td57
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td6
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp51
-rw-r--r--lib/Target/ARM/ARMJITInfo.h4
-rw-r--r--lib/Target/ARM/ARMScheduleV7.td26
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp7
-rw-r--r--lib/Target/ARM/ARMSubtarget.h13
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp25
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp4
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp8
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.cpp4
-rw-r--r--lib/Target/Alpha/AlphaJITInfo.cpp14
-rw-r--r--lib/Target/Alpha/AlphaJITInfo.h1
-rw-r--r--lib/Target/Blackfin/BlackfinISelLowering.cpp4
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.td2
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp4
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td8
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp4
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp10
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td31
-rw-r--r--lib/Target/MSP430/MSP430MCAsmInfo.cpp1
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp25
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp20
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp64
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp7
-rw-r--r--lib/Target/PIC16/PIC16DebugInfo.cpp6
-rw-r--r--lib/Target/PowerPC/PPCFrameInfo.h20
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp16
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp26
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td9
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp26
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.h1
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp57
-rw-r--r--lib/Target/README.txt162
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp6
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp4
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp17
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.h1
-rw-r--r--lib/Target/X86/Disassembler/CMakeLists.txt6
-rw-r--r--lib/Target/X86/Disassembler/Makefile16
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp29
-rw-r--r--lib/Target/X86/Makefile2
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp1
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp49
-rw-r--r--lib/Target/X86/X86Instr64bit.td2
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp1
-rw-r--r--lib/Target/X86/X86InstrInfo.td11
-rw-r--r--lib/Target/X86/X86InstrSSE.td16
-rw-r--r--lib/Target/X86/X86JITInfo.cpp66
-rw-r--r--lib/Target/X86/X86JITInfo.h12
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp2
-rw-r--r--lib/Target/X86/X86Subtarget.cpp15
-rw-r--r--lib/Target/X86/X86Subtarget.h5
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp6
68 files changed, 2239 insertions, 1653 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index b50b609..c95d4c8 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -402,6 +402,21 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
return Found;
}
+/// isPredicable - Return true if the specified instruction can be predicated.
+/// By default, this returns true for every instruction with a
+/// PredicateOperand.
+bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isPredicable())
+ return false;
+
+ if ((TID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+ ARMFunctionInfo *AFI =
+ MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+ return AFI->isThumb2Function();
+ }
+ return true;
+}
/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
@@ -647,11 +662,13 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
SrcRC == ARM::DPR_VFP2RegisterClass ||
SrcRC == ARM::DPR_8RegisterClass) {
// Always use neon reg-reg move if source or dest is NEON-only regclass.
- BuildMI(MBB, I, DL, get(ARM::VMOVDneon), DestReg).addReg(SrcReg);
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVDneon),
+ DestReg).addReg(SrcReg));
} else if (DestRC == ARM::QPRRegisterClass ||
DestRC == ARM::QPR_VFP2RegisterClass ||
DestRC == ARM::QPR_8RegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg);
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVQ),
+ DestReg).addReg(SrcReg));
} else {
return false;
}
@@ -695,13 +712,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// FIXME: Neon instructions should support predicates
if (Align >= 16
&& (getRegisterInfo().needsStackRealignment(MF))) {
- BuildMI(MBB, I, DL, get(ARM::VST1q64))
- .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO)
- .addReg(SrcReg, getKillRegState(isKill));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
+ .addFrameIndex(FI).addImm(0).addImm(0).addImm(128)
+ .addMemOperand(MMO)
+ .addReg(SrcReg, getKillRegState(isKill)));
} else {
- BuildMI(MBB, I, DL, get(ARM::VSTRQ)).
- addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRQ)).
+ addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
}
}
}
@@ -740,11 +758,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// FIXME: Neon instructions should support predicates
if (Align >= 16
&& (getRegisterInfo().needsStackRealignment(MF))) {
- BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
- .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO);
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
+ .addFrameIndex(FI).addImm(0).addImm(0).addImm(128)
+ .addMemOperand(MMO));
} else {
- BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0).
- addMemOperand(MMO);
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
}
}
}
@@ -978,7 +997,10 @@ bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0,
const MachineInstr *MI1,
const MachineRegisterInfo *MRI) const {
int Opcode = MI0->getOpcode();
- if (Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci_pic) {
+ if (Opcode == ARM::t2LDRpci ||
+ Opcode == ARM::t2LDRpci_pic ||
+ Opcode == ARM::tLDRpci ||
+ Opcode == ARM::tLDRpci_pic) {
if (MI1->getOpcode() != Opcode)
return false;
if (MI0->getNumOperands() != MI1->getNumOperands())
@@ -1005,16 +1027,6 @@ bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0,
return TargetInstrInfoImpl::isIdentical(MI0, MI1, MRI);
}
-unsigned ARMBaseInstrInfo::TailDuplicationLimit(const MachineBasicBlock &MBB,
- unsigned DefaultLimit) const {
- // If the target processor can predict indirect branches, it is highly
- // desirable to duplicate them, since it can often make them predictable.
- if (!MBB.empty() && isIndirectBranchOpcode(MBB.back().getOpcode()) &&
- getSubtarget().hasBranchTargetBuffer())
- return DefaultLimit + 2;
- return DefaultLimit;
-}
-
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 73e854f..282e30c 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -162,6 +162,22 @@ namespace ARMII {
I_BitShift = 25,
CondShift = 28
};
+
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // ARM Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ /// MO_LO16 - On a symbol operand, this represents a relocation containing
+ /// lower 16 bit of the address. Used only via movw instruction.
+ MO_LO16,
+
+ /// MO_HI16 - On a symbol operand, this represents a relocation containing
+ /// higher 16 bit of the address. Used only via movt instruction.
+ MO_HI16
+ };
}
class ARMBaseInstrInfo : public TargetInstrInfoImpl {
@@ -220,6 +236,8 @@ public:
virtual bool DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const;
+ virtual bool isPredicable(MachineInstr *MI) const;
+
/// GetInstSize - Returns the size of the specified MachineInstr.
///
virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
@@ -272,9 +290,6 @@ public:
virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other,
const MachineRegisterInfo *MRI) const;
-
- virtual unsigned TailDuplicationLimit(const MachineBasicBlock &MBB,
- unsigned DefaultLimit) const;
};
static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 19762ee..653328d 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -799,6 +799,54 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return ARM::SP;
}
+int
+ARMBaseRegisterInfo::getFrameIndexReference(MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+ bool isFixed = MFI->isFixedObjectIndex(FI);
+
+ FrameReg = ARM::SP;
+ if (AFI->isGPRCalleeSavedArea1Frame(FI))
+ Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FI))
+ Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ else if (AFI->isDPRCalleeSavedAreaFrame(FI))
+ Offset -= AFI->getDPRCalleeSavedAreaOffset();
+ else if (needsStackRealignment(MF)) {
+ // When dynamically realigning the stack, use the frame pointer for
+ // parameters, and the stack pointer for locals.
+ assert (hasFP(MF) && "dynamic stack realignment without a FP!");
+ if (isFixed) {
+ FrameReg = getFrameRegister(MF);
+ Offset -= AFI->getFramePtrSpillOffset();
+ }
+ } else if (hasFP(MF) && AFI->hasStackFrame()) {
+ if (isFixed || MFI->hasVarSizedObjects()) {
+ // Use frame pointer to reference fixed objects unless this is a
+ // frameless function.
+ FrameReg = getFrameRegister(MF);
+ Offset -= AFI->getFramePtrSpillOffset();
+ } else if (AFI->isThumb2Function()) {
+ // In Thumb2 mode, the negative offset is very limited.
+ int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+ if (FPOffset >= -255 && FPOffset < 0) {
+ FrameReg = getFrameRegister(MF);
+ Offset = FPOffset;
+ }
+ }
+ }
+ return Offset;
+}
+
+
+int
+ARMBaseRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
+ unsigned FrameReg;
+ return getFrameIndexReference(MF, FI, FrameReg);
+}
+
unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
llvm_unreachable("What is the exception register");
return 0;
@@ -1115,45 +1163,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
}
- unsigned FrameReg = ARM::SP;
int FrameIndex = MI.getOperand(i).getIndex();
int Offset = MFI->getObjectOffset(FrameIndex) + MFI->getStackSize() + SPAdj;
- bool isFixed = MFI->isFixedObjectIndex(FrameIndex);
+ unsigned FrameReg;
- // When doing dynamic stack realignment, all of these need to change(?)
- if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea1Offset();
- else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea2Offset();
- else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex))
- Offset -= AFI->getDPRCalleeSavedAreaOffset();
- else if (needsStackRealignment(MF)) {
- // When dynamically realigning the stack, use the frame pointer for
- // parameters, and the stack pointer for locals.
- assert (hasFP(MF) && "dynamic stack realignment without a FP!");
- if (isFixed) {
- FrameReg = getFrameRegister(MF);
- Offset -= AFI->getFramePtrSpillOffset();
- // When referencing from the frame pointer, stack pointer adjustments
- // don't matter.
- SPAdj = 0;
- }
- } else if (hasFP(MF) && AFI->hasStackFrame()) {
- assert(SPAdj == 0 && "Unexpected stack offset!");
- if (isFixed || MFI->hasVarSizedObjects()) {
- // Use frame pointer to reference fixed objects unless this is a
- // frameless function.
- FrameReg = getFrameRegister(MF);
- Offset -= AFI->getFramePtrSpillOffset();
- } else if (AFI->isThumb2Function()) {
- // In Thumb2 mode, the negative offset is very limited.
- int FPOffset = Offset - AFI->getFramePtrSpillOffset();
- if (FPOffset >= -255 && FPOffset < 0) {
- FrameReg = getFrameRegister(MF);
- Offset = FPOffset;
- }
- }
- }
+ Offset = getFrameIndexReference(MF, FrameIndex, FrameReg);
+ if (FrameReg != ARM::SP)
+ SPAdj = 0;
// Modify MI as necessary to handle as much of 'Offset' as possible
bool Done = false;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 4b267b0..2788d07 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -106,6 +106,9 @@ public:
// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
+ int getFrameIndexReference(MachineFunction &MF, int FI,
+ unsigned &FrameReg) const;
+ int getFrameIndexOffset(MachineFunction &MF, int FI) const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 766acff..17e7d44 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -613,7 +613,6 @@ void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) {
break;
case TargetInstrInfo::IMPLICIT_DEF:
case TargetInstrInfo::KILL:
- case ARM::DWARF_LOC:
// Do nothing.
break;
case ARM::CONSTPOOL_ENTRY:
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index d22c43a..e59a315 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -162,6 +162,9 @@ namespace {
/// the branch fix up pass.
bool HasFarJump;
+ /// HasInlineAsm - True if the function contains inline assembly.
+ bool HasInlineAsm;
+
const TargetInstrInfo *TII;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
@@ -236,10 +239,19 @@ void ARMConstantIslands::verify(MachineFunction &MF) {
if (!MBB->empty() &&
MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
unsigned MBBId = MBB->getNumber();
- assert((BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
+ assert(HasInlineAsm ||
+ (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
(BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
}
}
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+ CPUser &U = CPUsers[i];
+ unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8);
+ unsigned CPEOffset = GetOffsetOf(U.CPEMI);
+ unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset :
+ UserOffset - CPEOffset;
+ assert(Disp <= U.MaxDisp || "Constant pool entry out of range!");
+ }
#endif
}
@@ -269,6 +281,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
isThumb2 = AFI->isThumb2Function();
HasFarJump = false;
+ HasInlineAsm = false;
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
@@ -452,6 +465,19 @@ void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
/// and finding all of the constant pool users.
void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
const std::vector<MachineInstr*> &CPEMIs) {
+ // First thing, see if the function has any inline assembly in it. If so,
+ // we have to be conservative about alignment assumptions, as we don't
+ // know for sure the size of any instructions in the inline assembly.
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I)
+ if (I->getOpcode() == ARM::INLINEASM)
+ HasInlineAsm = true;
+ }
+
+ // Now go back through the instructions and build up our data structures
unsigned Offset = 0;
for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
MBBI != E; ++MBBI) {
@@ -481,7 +507,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
// A Thumb1 table jump may involve padding; for the offsets to
// be right, functions containing these must be 4-byte aligned.
AFI->setAlign(2U);
- if ((Offset+MBBSize)%4 != 0)
+ if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
// FIXME: Add a pseudo ALIGN instruction instead.
MBBSize += 2; // padding
continue; // Does not get an entry in ImmBranches
@@ -550,7 +576,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
case ARM::LEApcrel:
// This takes a SoImm, which is 8 bit immediate rotated. We'll
// pretend the maximum offset is 255 * 4. Since each instruction
- // 4 byte wide, this is always correct. We'llc heck for other
+ // 4 byte wide, this is always correct. We'll check for other
// displacements that fits in a SoImm as well.
Bits = 8;
Scale = 4;
@@ -609,7 +635,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
if (isThumb &&
!MBB.empty() &&
MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
- (Offset%4) != 0)
+ ((Offset%4) != 0 || HasInlineAsm))
MBBSize += 2;
BBSizes.push_back(MBBSize);
@@ -633,7 +659,7 @@ unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
// alignment padding, and compensate if so.
if (isThumb &&
MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
- Offset%4 != 0)
+ (Offset%4 != 0 || HasInlineAsm))
Offset += 2;
// Sum instructions before MI in MBB.
@@ -829,7 +855,7 @@ bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
MachineInstr *CPEMI, unsigned MaxDisp,
bool NegOk, bool DoDump) {
unsigned CPEOffset = GetOffsetOf(CPEMI);
- assert(CPEOffset%4 == 0 && "Misaligned CPE");
+ assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE");
if (DoDump) {
DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
@@ -870,7 +896,7 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
if (!isThumb)
continue;
MachineBasicBlock *MBB = MBBI;
- if (!MBB->empty()) {
+ if (!MBB->empty() && !HasInlineAsm) {
// Constant pool entries require padding.
if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
unsigned OldOffset = BBOffsets[i] - delta;
@@ -1226,7 +1252,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
// Compensate for .align 2 in thumb mode.
- if (isThumb && BBOffsets[NewIsland->getNumber()]%4 != 0)
+ if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm))
Size += 2;
// Increase the size of the island block to account for the new entry.
BBSizes[NewIsland->getNumber()] += Size;
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 4d0f899..c929c54 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -75,17 +75,30 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
}
case ARM::t2MOVi32imm: {
unsigned DstReg = MI.getOperand(0).getReg();
- unsigned Imm = MI.getOperand(1).getImm();
- unsigned Lo16 = Imm & 0xffff;
- unsigned Hi16 = (Imm >> 16) & 0xffff;
if (!MI.getOperand(0).isDead()) {
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(ARM::t2MOVi16), DstReg)
- .addImm(Lo16));
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(ARM::t2MOVTi16))
- .addReg(DstReg, getDefRegState(true))
- .addReg(DstReg).addImm(Hi16));
+ const MachineOperand &MO = MI.getOperand(1);
+ MachineInstrBuilder LO16, HI16;
+
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16),
+ DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16))
+ .addReg(DstReg, getDefRegState(true)).addReg(DstReg);
+
+ if (MO.isImm()) {
+ unsigned Imm = MO.getImm();
+ unsigned Lo16 = Imm & 0xffff;
+ unsigned Hi16 = (Imm >> 16) & 0xffff;
+ LO16 = LO16.addImm(Lo16);
+ HI16 = HI16.addImm(Hi16);
+ } else {
+ GlobalValue *GV = MO.getGlobal();
+ unsigned TF = MO.getTargetFlags();
+ LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+ HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+ // FIXME: What's about memoperands?
+ }
+ AddDefaultPred(LO16);
+ AddDefaultPred(HI16);
}
MI.eraseFromParent();
Modified = true;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 696a8e1..d63f3e6 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -149,6 +149,21 @@ private:
/// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
SDNode *SelectV6T2BitfieldExtractOp(SDValue Op, unsigned Opc);
+ /// SelectCMOVOp - Select CMOV instructions for ARM.
+ SDNode *SelectCMOVOp(SDValue Op);
+ SDNode *SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+ SDNode *SelectARMCMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+ SDNode *SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+ SDNode *SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
@@ -246,7 +261,9 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N,
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
- } else if (N.getOpcode() == ARMISD::Wrapper) {
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
Base = N.getOperand(0);
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -448,7 +465,9 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
- } else if (N.getOpcode() == ARMISD::Wrapper) {
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
Base = N.getOperand(0);
}
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
@@ -543,7 +562,13 @@ ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N,
}
if (N.getOpcode() != ISD::ADD) {
- Base = (N.getOpcode() == ARMISD::Wrapper) ? N.getOperand(0) : N;
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ } else
+ Base = N;
+
Offset = CurDAG->getRegister(0, MVT::i32);
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
@@ -666,7 +691,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N,
Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
- } else if (N.getOpcode() == ARMISD::Wrapper) {
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::TargetConstantPool)
return false; // We want to select t2LDRpci instead.
@@ -1034,12 +1061,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
case MVT::v4i32: OpcodeIndex = 2; break;
}
+ SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
if (is64BitVector) {
unsigned Opc = DOpcodes[OpcodeIndex];
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
+ const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,
+ Pred, PredReg, Chain };
std::vector<EVT> ResTys(NumVecs, VT);
ResTys.push_back(MVT::Other);
- return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
+ return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7);
}
EVT RegVT = GetNEONSubregVT(VT);
@@ -1047,10 +1077,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
// Quad registers are directly supported for VLD2,
// loading 2 pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
+ const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,
+ Pred, PredReg, Chain };
std::vector<EVT> ResTys(4, VT);
ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7);
Chain = SDValue(VLd, 4);
// Combine the even and odd subregs to produce the result.
@@ -1071,15 +1102,16 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
// Load the even subregs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
- SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 5);
+ const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align,
+ Pred, PredReg, Chain };
+ SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 7);
Chain = SDValue(VLdA, NumVecs+1);
// Load the odd subregs.
Opc = QOpcodes1[OpcodeIndex];
const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc,
- Align, Chain };
- SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 5);
+ Align, Pred, PredReg, Chain };
+ SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 7);
Chain = SDValue(VLdB, NumVecs+1);
// Combine the even and odd subregs to produce the result.
@@ -1123,6 +1155,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
case MVT::v4i32: OpcodeIndex = 2; break;
}
+ SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+
SmallVector<SDValue, 8> Ops;
Ops.push_back(MemAddr);
Ops.push_back(MemUpdate);
@@ -1133,8 +1168,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
unsigned Opc = DOpcodes[OpcodeIndex];
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
Ops.push_back(N->getOperand(Vec+3));
+ Ops.push_back(Pred);
+ Ops.push_back(PredReg);
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7);
}
EVT RegVT = GetNEONSubregVT(VT);
@@ -1148,8 +1185,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
N->getOperand(Vec+3)));
}
+ Ops.push_back(Pred);
+ Ops.push_back(PredReg);
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 11);
}
// Otherwise, quad registers are stored with two separate instructions,
@@ -1162,10 +1201,12 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
N->getOperand(Vec+3)));
+ Ops.push_back(Pred);
+ Ops.push_back(PredReg);
Ops.push_back(Chain);
unsigned Opc = QOpcodes0[OpcodeIndex];
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+5);
+ MVT::Other, Ops.data(), NumVecs+7);
Chain = SDValue(VStA, 1);
// Store the odd subregs.
@@ -1173,10 +1214,12 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
N->getOperand(Vec+3));
- Ops[NumVecs+4] = Chain;
+ Ops[NumVecs+4] = Pred;
+ Ops[NumVecs+5] = PredReg;
+ Ops[NumVecs+6] = Chain;
Opc = QOpcodes1[OpcodeIndex];
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+5);
+ MVT::Other, Ops.data(), NumVecs+7);
Chain = SDValue(VStB, 1);
ReplaceUses(SDValue(N, 0), Chain);
return NULL;
@@ -1224,6 +1267,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
case MVT::v4i32: OpcodeIndex = 1; break;
}
+ SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+
SmallVector<SDValue, 9> Ops;
Ops.push_back(MemAddr);
Ops.push_back(MemUpdate);
@@ -1249,15 +1295,17 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
N->getOperand(Vec+3)));
}
Ops.push_back(getI32Imm(Lane));
+ Ops.push_back(Pred);
+ Ops.push_back(PredReg);
Ops.push_back(Chain);
if (!IsLoad)
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7);
std::vector<EVT> ResTys(NumVecs, RegVT);
ResTys.push_back(MVT::Other);
SDNode *VLdLn =
- CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+5);
+ CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+7);
// For a 64-bit vector load to D registers, nothing more needs to be done.
if (is64BitVector)
return VLdLn;
@@ -1282,7 +1330,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op,
return NULL;
unsigned Shl_imm = 0;
- if (isOpcWithIntImmediate(Op.getOperand(0).getNode(), ISD::SHL, Shl_imm)){
+ if (isOpcWithIntImmediate(Op.getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
unsigned Srl_imm = 0;
if (isInt32Immediate(Op.getOperand(1), Srl_imm)) {
@@ -1302,6 +1350,173 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op,
return NULL;
}
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ if (SelectT2ShifterOperandReg(Op, TrueVal, CPTmp0, CPTmp1)) {
+ unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
+ unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
+ unsigned Opc = 0;
+ switch (SOShOp) {
+ case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
+ case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
+ case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
+ case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
+ default:
+ llvm_unreachable("Unknown so_reg opcode!");
+ break;
+ }
+ SDValue SOShImm =
+ CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32,Ops, 6);
+ }
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (SelectShifterOperandReg(Op, TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCs, MVT::i32, Ops, 7);
+ }
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+ if (!T)
+ return 0;
+
+ if (Predicate_t2_so_imm(TrueVal.getNode())) {
+ SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(Op.getNode(),
+ ARM::t2MOVCCi, MVT::i32, Ops, 5);
+ }
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+ if (!T)
+ return 0;
+
+ if (Predicate_so_imm(TrueVal.getNode())) {
+ SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(Op.getNode(),
+ ARM::MOVCCi, MVT::i32, Ops, 5);
+ }
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) {
+ EVT VT = Op.getValueType();
+ SDValue FalseVal = Op.getOperand(0);
+ SDValue TrueVal = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ SDValue CCR = Op.getOperand(3);
+ SDValue InFlag = Op.getOperand(4);
+ assert(CC.getOpcode() == ISD::Constant);
+ assert(CCR.getOpcode() == ISD::Register);
+ ARMCC::CondCodes CCVal =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue();
+
+ if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 18 cost = 1 size = 0
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (Subtarget->isThumb()) {
+ SDNode *Res = SelectT2CMOVShiftOp(Op, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectT2CMOVShiftOp(Op, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ } else {
+ SDNode *Res = SelectARMCMOVShiftOp(Op, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectARMCMOVShiftOp(Op, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false,
+ // (imm:i32)<<P:Predicate_so_imm>>:$true,
+ // (imm:i32):$cc)
+ // Emits: (MOVCCi:i32 GPR:i32:$false,
+ // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
+ // Pattern complexity = 10 cost = 1 size = 0
+ if (Subtarget->isThumb()) {
+ SDNode *Res = SelectT2CMOVSoImmOp(Op, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectT2CMOVSoImmOp(Op, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ } else {
+ SDNode *Res = SelectARMCMOVSoImmOp(Op, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectARMCMOVSoImmOp(Op, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ }
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+ //
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 11 size = 0
+ //
+ // Also FCPYScc and FCPYDcc.
+ SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: assert(false && "Illegal conditional move type!");
+ break;
+ case MVT::i32:
+ Opc = Subtarget->isThumb()
+ ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
+ : ARM::MOVCCr;
+ break;
+ case MVT::f32:
+ Opc = ARM::VMOVScc;
+ break;
+ case MVT::f64:
+ Opc = ARM::VMOVDcc;
+ break;
+ }
+ return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
+}
+
SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDNode *N = Op.getNode();
DebugLoc dl = N->getDebugLoc();
@@ -1337,7 +1552,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDNode *ResNode;
if (Subtarget->isThumb1Only()) {
- SDValue Pred = CurDAG->getTargetConstant(0xEULL, MVT::i32);
+ SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other,
@@ -1549,122 +1764,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDValue(Chain.getNode(), Chain.getResNo()));
return NULL;
}
- case ARMISD::CMOV: {
- EVT VT = Op.getValueType();
- SDValue N0 = Op.getOperand(0);
- SDValue N1 = Op.getOperand(1);
- SDValue N2 = Op.getOperand(2);
- SDValue N3 = Op.getOperand(3);
- SDValue InFlag = Op.getOperand(4);
- assert(N2.getOpcode() == ISD::Constant);
- assert(N3.getOpcode() == ISD::Register);
-
- if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
- // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
- // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
- // Pattern complexity = 18 cost = 1 size = 0
- SDValue CPTmp0;
- SDValue CPTmp1;
- SDValue CPTmp2;
- if (Subtarget->isThumb()) {
- if (SelectT2ShifterOperandReg(Op, N1, CPTmp0, CPTmp1)) {
- unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
- unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
- unsigned Opc = 0;
- switch (SOShOp) {
- case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
- case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
- case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
- case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
- default:
- llvm_unreachable("Unknown so_reg opcode!");
- break;
- }
- SDValue SOShImm =
- CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
- SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
- cast<ConstantSDNode>(N2)->getZExtValue()),
- MVT::i32);
- SDValue Ops[] = { N0, CPTmp0, SOShImm, Tmp2, N3, InFlag };
- return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32,Ops, 6);
- }
- } else {
- if (SelectShifterOperandReg(Op, N1, CPTmp0, CPTmp1, CPTmp2)) {
- SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
- cast<ConstantSDNode>(N2)->getZExtValue()),
- MVT::i32);
- SDValue Ops[] = { N0, CPTmp0, CPTmp1, CPTmp2, Tmp2, N3, InFlag };
- return CurDAG->SelectNodeTo(Op.getNode(),
- ARM::MOVCCs, MVT::i32, Ops, 7);
- }
- }
-
- // Pattern: (ARMcmov:i32 GPR:i32:$false,
- // (imm:i32)<<P:Predicate_so_imm>>:$true,
- // (imm:i32):$cc)
- // Emits: (MOVCCi:i32 GPR:i32:$false,
- // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
- // Pattern complexity = 10 cost = 1 size = 0
- if (N3.getOpcode() == ISD::Constant) {
- if (Subtarget->isThumb()) {
- if (Predicate_t2_so_imm(N3.getNode())) {
- SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned)
- cast<ConstantSDNode>(N1)->getZExtValue()),
- MVT::i32);
- SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
- cast<ConstantSDNode>(N2)->getZExtValue()),
- MVT::i32);
- SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag };
- return CurDAG->SelectNodeTo(Op.getNode(),
- ARM::t2MOVCCi, MVT::i32, Ops, 5);
- }
- } else {
- if (Predicate_so_imm(N3.getNode())) {
- SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned)
- cast<ConstantSDNode>(N1)->getZExtValue()),
- MVT::i32);
- SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
- cast<ConstantSDNode>(N2)->getZExtValue()),
- MVT::i32);
- SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag };
- return CurDAG->SelectNodeTo(Op.getNode(),
- ARM::MOVCCi, MVT::i32, Ops, 5);
- }
- }
- }
- }
-
- // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Pattern complexity = 6 cost = 1 size = 0
- //
- // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Pattern complexity = 6 cost = 11 size = 0
- //
- // Also FCPYScc and FCPYDcc.
- SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
- cast<ConstantSDNode>(N2)->getZExtValue()),
- MVT::i32);
- SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag };
- unsigned Opc = 0;
- switch (VT.getSimpleVT().SimpleTy) {
- default: assert(false && "Illegal conditional move type!");
- break;
- case MVT::i32:
- Opc = Subtarget->isThumb()
- ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
- : ARM::MOVCCr;
- break;
- case MVT::f32:
- Opc = ARM::VMOVScc;
- break;
- case MVT::f64:
- Opc = ARM::VMOVDcc;
- break;
- }
- return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
- }
+ case ARMISD::CMOV:
+ return SelectCMOVOp(Op);
case ARMISD::CNEG: {
EVT VT = Op.getValueType();
SDValue N0 = Op.getOperand(0);
@@ -1707,8 +1808,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VZIPq32; break;
}
- return CurDAG->getMachineNode(Opc, dl, VT, VT,
- N->getOperand(0), N->getOperand(1));
+ SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
}
case ARMISD::VUZP: {
unsigned Opc = 0;
@@ -1724,8 +1827,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VUZPq32; break;
}
- return CurDAG->getMachineNode(Opc, dl, VT, VT,
- N->getOperand(0), N->getOperand(1));
+ SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
}
case ARMISD::VTRN: {
unsigned Opc = 0;
@@ -1741,8 +1846,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VTRNq32; break;
}
- return CurDAG->getMachineNode(Opc, dl, VT, VT,
- N->getOperand(0), N->getOperand(1));
+ SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
}
case ISD::INTRINSIC_VOID:
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index c3af8e6..c839fc6 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -39,6 +39,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <sstream>
@@ -355,10 +356,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
- // Support label based line numbers.
- setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
- setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
-
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
@@ -1360,10 +1357,17 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
PseudoSourceValue::getGOT(), 0);
return Result;
} else {
- SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
- CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ // If we have T2 ops, we can materialize the address directly via movt/movw
+ // pair. This is always cheaper.
+ if (Subtarget->useMovt()) {
+ return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, PtrVT));
+ } else {
+ SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ PseudoSourceValue::getConstantPool(), 0);
+ }
}
}
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 83b5cb4..e76e93c 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1217,27 +1217,45 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
//
class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
- string asm, string cstr, list<dag> pattern>
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
: InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> {
let OutOperandList = oops;
- let InOperandList = iops;
- let AsmString = asm;
+ let InOperandList = !con(iops, (ops pred:$p));
+ let AsmString = !strconcat(
+ !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
+ !strconcat("\t", asm));
let Pattern = pattern;
list<Predicate> Predicates = [HasNEON];
}
-class NI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
- : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, asm, "", pattern> {
+// Same as NeonI except it does not have a "data type" specifier.
+class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ops pred:$p));
+ let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
}
-class NI4<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
- : NeonI<oops, iops, AddrMode4, IndexModeNone, itin, asm, "", pattern> {
+class NI<dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : NeonXI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, asm, "",
+ pattern> {
+}
+
+class NI4<dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : NeonXI<oops, iops, AddrMode4, IndexModeNone, itin, opc, asm, "",
+ pattern> {
}
class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
dag oops, dag iops, InstrItinClass itin,
- string asm, string cstr, list<dag> pattern>
- : NeonI<oops, iops, AddrMode6, IndexModeNone, itin, asm, cstr, pattern> {
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NeonI<oops, iops, AddrMode6, IndexModeNone, itin, opc, dt, asm, cstr,
+ pattern> {
let Inst{31-24} = 0b11110100;
let Inst{23} = op23;
let Inst{21-20} = op21_20;
@@ -1246,8 +1264,16 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
}
class NDataI<dag oops, dag iops, InstrItinClass itin,
- string asm, string cstr, list<dag> pattern>
- : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, asm, cstr, pattern> {
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, dt, asm,
+ cstr, pattern> {
+ let Inst{31-25} = 0b1111001;
+}
+
+class NDataXI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NeonXI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, asm,
+ cstr, pattern> {
let Inst{31-25} = 0b1111001;
}
@@ -1255,8 +1281,8 @@ class NDataI<dag oops, dag iops, InstrItinClass itin,
class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
bit op5, bit op4,
dag oops, dag iops, InstrItinClass itin,
- string asm, string cstr, list<dag> pattern>
- : NDataI<oops, iops, itin, asm, cstr, pattern> {
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
let Inst{23} = op23;
let Inst{21-19} = op21_19;
let Inst{11-8} = op11_8;
@@ -1270,8 +1296,8 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
bits<5> op11_7, bit op6, bit op4,
dag oops, dag iops, InstrItinClass itin,
- string asm, string cstr, list<dag> pattern>
- : NDataI<oops, iops, itin, asm, cstr, pattern> {
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
let Inst{24-23} = op24_23;
let Inst{21-20} = op21_20;
let Inst{19-18} = op19_18;
@@ -1281,14 +1307,16 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
let Inst{4} = op4;
}
-// NEON Vector Duplicate (scalar).
-// Inst{19-16} is specified by subclasses.
-class N2VDup<bits<2> op24_23, bits<2> op21_20, bits<5> op11_7, bit op6, bit op4,
- dag oops, dag iops, InstrItinClass itin,
- string asm, string cstr, list<dag> pattern>
- : NDataI<oops, iops, itin, asm, cstr, pattern> {
+// Same as N2V except it doesn't have a datatype suffix.
+class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, itin, opc, asm, cstr, pattern> {
let Inst{24-23} = op24_23;
let Inst{21-20} = op21_20;
+ let Inst{19-18} = op19_18;
+ let Inst{17-16} = op17_16;
let Inst{11-7} = op11_7;
let Inst{6} = op6;
let Inst{4} = op4;
@@ -1297,8 +1325,8 @@ class N2VDup<bits<2> op24_23, bits<2> op21_20, bits<5> op11_7, bit op6, bit op4,
// NEON 2 vector register with immediate.
class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
dag oops, dag iops, InstrItinClass itin,
- string asm, string cstr, list<dag> pattern>
- : NDataI<oops, iops, itin, asm, cstr, pattern> {
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
let Inst{24} = op24;
let Inst{23} = op23;
let Inst{11-8} = op11_8;
@@ -1310,8 +1338,8 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
// NEON 3 vector register format.
class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
dag oops, dag iops, InstrItinClass itin,
- string asm, string cstr, list<dag> pattern>
- : NDataI<oops, iops, itin, asm, cstr, pattern> {
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
let Inst{24} = op24;
let Inst{23} = op23;
let Inst{21-20} = op21_20;
@@ -1320,16 +1348,15 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
let Inst{4} = op4;
}
-// NEON 3 vector register with immediate. This is only used for VEXT where
-// op11_8 represents the starting byte index of the extracted result in the
-// concatenation of the operands and is left unspecified.
-class N3VImm<bit op24, bit op23, bits<2> op21_20, bit op6, bit op4,
- dag oops, dag iops, InstrItinClass itin,
- string asm, string cstr, list<dag> pattern>
- : NDataI<oops, iops, itin, asm, cstr, pattern> {
+// Same as N3VX except it doesn't have a data type suffix.
+class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, itin, opc, asm, cstr, pattern> {
let Inst{24} = op24;
let Inst{23} = op23;
let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
let Inst{6} = op6;
let Inst{4} = op4;
}
@@ -1337,29 +1364,37 @@ class N3VImm<bit op24, bit op23, bits<2> op21_20, bit op6, bit op4,
// NEON VMOVs between scalar and core registers.
class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : AI<oops, iops, f, itin, opc, asm, pattern> {
+ string opc, string dt, string asm, list<dag> pattern>
+ : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain,
+ "", itin> {
let Inst{27-20} = opcod1;
let Inst{11-8} = opcod2;
let Inst{6-5} = opcod3;
let Inst{4} = 1;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ops pred:$p));
+ let AsmString = !strconcat(
+ !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
+ !strconcat("\t", asm));
+ let Pattern = pattern;
list<Predicate> Predicates = [HasNEON];
}
class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+ string opc, string dt, string asm, list<dag> pattern>
: NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONGetLnFrm, itin,
- opc, asm, pattern>;
+ opc, dt, asm, pattern>;
class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+ string opc, string dt, string asm, list<dag> pattern>
: NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONSetLnFrm, itin,
- opc, asm, pattern>;
+ opc, dt, asm, pattern>;
class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+ string opc, string dt, string asm, list<dag> pattern>
: NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, itin,
- opc, asm, pattern>;
+ opc, dt, asm, pattern>;
// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
// for single-precision FP.
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 79bde29..7516d3c 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -116,6 +116,10 @@ def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
def CarryDefIsUnused : Predicate<"!N.getNode()->hasAnyUseOfValue(1)">;
def CarryDefIsUsed : Predicate<"N.getNode()->hasAnyUseOfValue(1)">;
+// FIXME: Eventually this will be just "hasV6T2Ops".
+def UseMovt : Predicate<"Subtarget->useMovt()">;
+def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -204,7 +208,7 @@ def hi16 : SDNodeXForm<imm, [{
def lo16AllZero : PatLeaf<(i32 imm), [{
// Returns true if all low 16-bits are 0.
return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
- }], hi16>;
+}], hi16>;
/// imm0_65535 predicate - True if the 32-bit immediate is in the range
/// [0.65535].
@@ -284,6 +288,22 @@ def so_imm2part_2 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(V, MVT::i32);
}]>;
+def so_neg_imm2part : Operand<i32>, PatLeaf<(imm), [{
+ return ARM_AM::isSOImmTwoPartVal(-(int)N->getZExtValue());
+ }]> {
+ let PrintMethod = "printSOImm2PartOperand";
+}
+
+def so_neg_imm2part_1 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getSOImmTwoPartFirst(-(int)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def so_neg_imm2part_2 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getSOImmTwoPartSecond(-(int)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
def imm0_31 : Operand<i32>, PatLeaf<(imm), [{
return (int32_t)N->getZExtValue() < 32;
@@ -568,12 +588,6 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
[(ARMcallseq_start timm:$amt)]>;
}
-def DWARF_LOC :
-PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), NoItinerary,
- ".loc $file, $line, $col",
- [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>;
-
-
// Address computation and loads and stores in PIC mode.
let isNotDuplicable = 1 in {
def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
@@ -581,25 +595,24 @@ def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
[(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
let AddedComplexity = 10 in {
-let canFoldAsLoad = 1 in
def PICLDR : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p\t$dst, $addr",
[(set GPR:$dst, (load addrmodepc:$addr))]>;
def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}h\t$dst, $addr",
+ Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrh${p}\t$dst, $addr",
[(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>;
def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}b\t$dst, $addr",
+ Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrb${p}\t$dst, $addr",
[(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>;
def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sh\t$dst, $addr",
+ Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsh${p}\t$dst, $addr",
[(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>;
def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sb\t$dst, $addr",
+ Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsb${p}\t$dst, $addr",
[(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>;
}
let AddedComplexity = 10 in {
@@ -801,13 +814,14 @@ let isBranch = 1, isTerminator = 1 in {
//
// Load
-let canFoldAsLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
"ldr", "\t$dst, $addr",
[(set GPR:$dst, (load addrmode2:$addr))]>;
// Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
+ mayHaveSideEffects = 1 in
def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
"ldr", "\t$dst, $addr", []>;
@@ -992,7 +1006,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src),
let Constraints = "$src = $dst" in
def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
DPFrm, IIC_iMOVi,
- "movt", "\t$dst, $imm",
+ "movt", "\t$dst, $imm",
[(set GPR:$dst,
(or (and GPR:$src, 0xffff),
lo16AllZero:$imm))]>, UnaryDP,
@@ -1593,12 +1607,6 @@ let Defs =
// Non-Instruction Patterns
//
-// ConstantPool, GlobalAddress, and JumpTable
-def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>;
-def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
-def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
- (LEApcrelJT tjumptable:$dst, imm:$id)>;
-
// Large immediate handling.
// Two piece so_imms.
@@ -1618,9 +1626,9 @@ def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS),
def : ARMPat<(add GPR:$LHS, so_imm2part:$RHS),
(ADDri (ADDri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
(so_imm2part_2 imm:$RHS))>;
-def : ARMPat<(sub GPR:$LHS, so_imm2part:$RHS),
- (SUBri (SUBri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
- (so_imm2part_2 imm:$RHS))>;
+def : ARMPat<(add GPR:$LHS, so_neg_imm2part:$RHS),
+ (SUBri (SUBri GPR:$LHS, (so_neg_imm2part_1 imm:$RHS)),
+ (so_neg_imm2part_2 imm:$RHS))>;
// 32-bit immediate using movw + movt.
// This is a single pseudo instruction, the benefit is that it can be remat'd
@@ -1628,10 +1636,19 @@ def : ARMPat<(sub GPR:$LHS, so_imm2part:$RHS),
// FIXME: Remove this when we can do generalized remat.
let isReMaterializable = 1 in
def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi,
- "movw", "\t$dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}",
+ "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
[(set GPR:$dst, (i32 imm:$src))]>,
Requires<[IsARM, HasV6T2]>;
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
+ Requires<[IsARM, DontUseMovt]>;
+def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
+ Requires<[IsARM, UseMovt]>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
// TODO: add,sub,and, 3-instr forms?
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index e1353b7..3166931 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -124,7 +124,7 @@ let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
def VLDMD : NI<(outs),
(ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
IIC_fpLoadm,
- "vldm${addr:submode} ${addr:base}, $dst1",
+ "vldm", "${addr:submode} ${addr:base}, $dst1",
[]> {
let Inst{27-25} = 0b110;
let Inst{20} = 1;
@@ -134,7 +134,7 @@ def VLDMD : NI<(outs),
def VLDMS : NI<(outs),
(ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
IIC_fpLoadm,
- "vldm${addr:submode} ${addr:base}, $dst1",
+ "vldm", "${addr:submode} ${addr:base}, $dst1",
[]> {
let Inst{27-25} = 0b110;
let Inst{20} = 1;
@@ -146,7 +146,7 @@ def VLDMS : NI<(outs),
// Use vldmia to load a Q register as a D register pair.
def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
IIC_fpLoadm,
- "vldmia\t$addr, ${dst:dregpair}",
+ "vldmia", "$addr, ${dst:dregpair}",
[(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
let Inst{27-25} = 0b110;
let Inst{24} = 0; // P bit
@@ -158,7 +158,7 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
// Use vstmia to store a Q register as a D register pair.
def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr),
IIC_fpStorem,
- "vstmia\t$addr, ${src:dregpair}",
+ "vstmia", "$addr, ${src:dregpair}",
[(store (v2f64 QPR:$src), addrmode4:$addr)]> {
let Inst{27-25} = 0b110;
let Inst{24} = 0; // P bit
@@ -168,178 +168,221 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr),
}
// VLD1 : Vector Load (multiple single elements)
-class VLD1D<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+class VLD1D<bits<4> op7_4, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp>
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
- !strconcat(OpcodeStr, "\t\\{$dst\\}, $addr"), "",
+ OpcodeStr, Dt, "\\{$dst\\}, $addr", "",
[(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
-class VLD1Q<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+class VLD1Q<bits<4> op7_4, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp>
: NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
- !strconcat(OpcodeStr, "\t${dst:dregpair}, $addr"), "",
+ OpcodeStr, Dt, "${dst:dregpair}, $addr", "",
[(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
-def VLD1d8 : VLD1D<0b0000, "vld1.8", v8i8, int_arm_neon_vld1>;
-def VLD1d16 : VLD1D<0b0100, "vld1.16", v4i16, int_arm_neon_vld1>;
-def VLD1d32 : VLD1D<0b1000, "vld1.32", v2i32, int_arm_neon_vld1>;
-def VLD1df : VLD1D<0b1000, "vld1.32", v2f32, int_arm_neon_vld1>;
-def VLD1d64 : VLD1D<0b1100, "vld1.64", v1i64, int_arm_neon_vld1>;
+def VLD1d8 : VLD1D<0b0000, "vld1", "8", v8i8, int_arm_neon_vld1>;
+def VLD1d16 : VLD1D<0b0100, "vld1", "16", v4i16, int_arm_neon_vld1>;
+def VLD1d32 : VLD1D<0b1000, "vld1", "32", v2i32, int_arm_neon_vld1>;
+def VLD1df : VLD1D<0b1000, "vld1", "32", v2f32, int_arm_neon_vld1>;
+def VLD1d64 : VLD1D<0b1100, "vld1", "64", v1i64, int_arm_neon_vld1>;
-def VLD1q8 : VLD1Q<0b0000, "vld1.8", v16i8, int_arm_neon_vld1>;
-def VLD1q16 : VLD1Q<0b0100, "vld1.16", v8i16, int_arm_neon_vld1>;
-def VLD1q32 : VLD1Q<0b1000, "vld1.32", v4i32, int_arm_neon_vld1>;
-def VLD1qf : VLD1Q<0b1000, "vld1.32", v4f32, int_arm_neon_vld1>;
-def VLD1q64 : VLD1Q<0b1100, "vld1.64", v2i64, int_arm_neon_vld1>;
+def VLD1q8 : VLD1Q<0b0000, "vld1", "8", v16i8, int_arm_neon_vld1>;
+def VLD1q16 : VLD1Q<0b0100, "vld1", "16", v8i16, int_arm_neon_vld1>;
+def VLD1q32 : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>;
+def VLD1qf : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>;
+def VLD1q64 : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
// VLD2 : Vector Load (multiple 2-element structures)
-class VLD2D<bits<4> op7_4, string OpcodeStr>
+class VLD2D<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2),
(ins addrmode6:$addr), IIC_VLD2,
- !strconcat(OpcodeStr, "\t\\{$dst1,$dst2\\}, $addr"), "", []>;
-class VLD2Q<bits<4> op7_4, string OpcodeStr>
+ OpcodeStr, Dt, "\\{$dst1,$dst2\\}, $addr", "", []>;
+class VLD2Q<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b10,0b0011,op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$addr), IIC_VLD2,
- !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"),
+ OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr",
"", []>;
-def VLD2d8 : VLD2D<0b0000, "vld2.8">;
-def VLD2d16 : VLD2D<0b0100, "vld2.16">;
-def VLD2d32 : VLD2D<0b1000, "vld2.32">;
+def VLD2d8 : VLD2D<0b0000, "vld2", "8">;
+def VLD2d16 : VLD2D<0b0100, "vld2", "16">;
+def VLD2d32 : VLD2D<0b1000, "vld2", "32">;
def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2),
(ins addrmode6:$addr), IIC_VLD1,
- "vld1.64\t\\{$dst1,$dst2\\}, $addr", "", []>;
+ "vld1", "64", "\\{$dst1,$dst2\\}, $addr", "", []>;
-def VLD2q8 : VLD2Q<0b0000, "vld2.8">;
-def VLD2q16 : VLD2Q<0b0100, "vld2.16">;
-def VLD2q32 : VLD2Q<0b1000, "vld2.32">;
+def VLD2q8 : VLD2Q<0b0000, "vld2", "8">;
+def VLD2q16 : VLD2Q<0b0100, "vld2", "16">;
+def VLD2q32 : VLD2Q<0b1000, "vld2", "32">;
// VLD3 : Vector Load (multiple 3-element structures)
-class VLD3D<bits<4> op7_4, string OpcodeStr>
+class VLD3D<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$addr), IIC_VLD3,
- !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), "", []>;
-class VLD3WB<bits<4> op7_4, string OpcodeStr>
+ OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3\\}, $addr", "", []>;
+class VLD3WB<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6:$addr), IIC_VLD3,
- !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"),
+ OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3\\}, $addr",
"$addr.addr = $wb", []>;
-def VLD3d8 : VLD3D<0b0000, "vld3.8">;
-def VLD3d16 : VLD3D<0b0100, "vld3.16">;
-def VLD3d32 : VLD3D<0b1000, "vld3.32">;
+def VLD3d8 : VLD3D<0b0000, "vld3", "8">;
+def VLD3d16 : VLD3D<0b0100, "vld3", "16">;
+def VLD3d32 : VLD3D<0b1000, "vld3", "32">;
def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$addr), IIC_VLD1,
- "vld1.64\t\\{$dst1,$dst2,$dst3\\}, $addr", "", []>;
+ "vld1", "64", "\\{$dst1,$dst2,$dst3\\}, $addr", "", []>;
// vld3 to double-spaced even registers.
-def VLD3q8a : VLD3WB<0b0000, "vld3.8">;
-def VLD3q16a : VLD3WB<0b0100, "vld3.16">;
-def VLD3q32a : VLD3WB<0b1000, "vld3.32">;
+def VLD3q8a : VLD3WB<0b0000, "vld3", "8">;
+def VLD3q16a : VLD3WB<0b0100, "vld3", "16">;
+def VLD3q32a : VLD3WB<0b1000, "vld3", "32">;
// vld3 to double-spaced odd registers.
-def VLD3q8b : VLD3WB<0b0000, "vld3.8">;
-def VLD3q16b : VLD3WB<0b0100, "vld3.16">;
-def VLD3q32b : VLD3WB<0b1000, "vld3.32">;
+def VLD3q8b : VLD3WB<0b0000, "vld3", "8">;
+def VLD3q16b : VLD3WB<0b0100, "vld3", "16">;
+def VLD3q32b : VLD3WB<0b1000, "vld3", "32">;
// VLD4 : Vector Load (multiple 4-element structures)
-class VLD4D<bits<4> op7_4, string OpcodeStr>
+class VLD4D<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b10,0b0000,op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$addr), IIC_VLD4,
- !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"),
+ OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr",
"", []>;
-class VLD4WB<bits<4> op7_4, string OpcodeStr>
+class VLD4WB<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b10,0b0001,op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6:$addr), IIC_VLD4,
- !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"),
+ OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr",
"$addr.addr = $wb", []>;
-def VLD4d8 : VLD4D<0b0000, "vld4.8">;
-def VLD4d16 : VLD4D<0b0100, "vld4.16">;
-def VLD4d32 : VLD4D<0b1000, "vld4.32">;
+def VLD4d8 : VLD4D<0b0000, "vld4", "8">;
+def VLD4d16 : VLD4D<0b0100, "vld4", "16">;
+def VLD4d32 : VLD4D<0b1000, "vld4", "32">;
def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$addr), IIC_VLD1,
- "vld1.64\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>;
+ "vld1", "64", "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>;
// vld4 to double-spaced even registers.
-def VLD4q8a : VLD4WB<0b0000, "vld4.8">;
-def VLD4q16a : VLD4WB<0b0100, "vld4.16">;
-def VLD4q32a : VLD4WB<0b1000, "vld4.32">;
+def VLD4q8a : VLD4WB<0b0000, "vld4", "8">;
+def VLD4q16a : VLD4WB<0b0100, "vld4", "16">;
+def VLD4q32a : VLD4WB<0b1000, "vld4", "32">;
// vld4 to double-spaced odd registers.
-def VLD4q8b : VLD4WB<0b0000, "vld4.8">;
-def VLD4q16b : VLD4WB<0b0100, "vld4.16">;
-def VLD4q32b : VLD4WB<0b1000, "vld4.32">;
+def VLD4q8b : VLD4WB<0b0000, "vld4", "8">;
+def VLD4q16b : VLD4WB<0b0100, "vld4", "16">;
+def VLD4q32b : VLD4WB<0b1000, "vld4", "32">;
// VLD1LN : Vector Load (single element to one lane)
// FIXME: Not yet implemented.
// VLD2LN : Vector Load (single 2-element structure to one lane)
-class VLD2LN<bits<4> op11_8, string OpcodeStr>
- : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
- IIC_VLD2,
- !strconcat(OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane]\\}, $addr"),
- "$src1 = $dst1, $src2 = $dst2", []>;
-
-def VLD2LNd8 : VLD2LN<0b0001, "vld2.8">;
-def VLD2LNd16 : VLD2LN<0b0101, "vld2.16">;
-def VLD2LNd32 : VLD2LN<0b1001, "vld2.32">;
+class VLD2LN<bits<4> op11_8, string OpcodeStr, string Dt>
+ : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+ IIC_VLD2,
+ OpcodeStr, Dt, "\\{$dst1[$lane],$dst2[$lane]\\}, $addr",
+ "$src1 = $dst1, $src2 = $dst2", []>;
+
+// vld2 to single-spaced registers.
+def VLD2LNd8 : VLD2LN<0b0001, "vld2", "8">;
+def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> {
+ let Inst{5} = 0;
+}
+def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> {
+ let Inst{6} = 0;
+}
// vld2 to double-spaced even registers.
-def VLD2LNq16a: VLD2LN<0b0101, "vld2.16">;
-def VLD2LNq32a: VLD2LN<0b1001, "vld2.32">;
+def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> {
+ let Inst{5} = 1;
+}
+def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> {
+ let Inst{6} = 1;
+}
// vld2 to double-spaced odd registers.
-def VLD2LNq16b: VLD2LN<0b0101, "vld2.16">;
-def VLD2LNq32b: VLD2LN<0b1001, "vld2.32">;
+def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> {
+ let Inst{5} = 1;
+}
+def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> {
+ let Inst{6} = 1;
+}
// VLD3LN : Vector Load (single 3-element structure to one lane)
-class VLD3LN<bits<4> op11_8, string OpcodeStr>
- : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
- nohash_imm:$lane), IIC_VLD3,
- !strconcat(OpcodeStr,
- "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr"),
- "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
-
-def VLD3LNd8 : VLD3LN<0b0010, "vld3.8">;
-def VLD3LNd16 : VLD3LN<0b0110, "vld3.16">;
-def VLD3LNd32 : VLD3LN<0b1010, "vld3.32">;
+class VLD3LN<bits<4> op11_8, string OpcodeStr, string Dt>
+ : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VLD3,
+ OpcodeStr, Dt,
+ "\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr",
+ "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
+
+// vld3 to single-spaced registers.
+def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> {
+ let Inst{4} = 0;
+}
+def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> {
+ let Inst{5-4} = 0b00;
+}
+def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> {
+ let Inst{6-4} = 0b000;
+}
// vld3 to double-spaced even registers.
-def VLD3LNq16a: VLD3LN<0b0110, "vld3.16">;
-def VLD3LNq32a: VLD3LN<0b1010, "vld3.32">;
+def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> {
+ let Inst{5-4} = 0b10;
+}
+def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> {
+ let Inst{6-4} = 0b100;
+}
// vld3 to double-spaced odd registers.
-def VLD3LNq16b: VLD3LN<0b0110, "vld3.16">;
-def VLD3LNq32b: VLD3LN<0b1010, "vld3.32">;
+def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> {
+ let Inst{5-4} = 0b10;
+}
+def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> {
+ let Inst{6-4} = 0b100;
+}
// VLD4LN : Vector Load (single 4-element structure to one lane)
-class VLD4LN<bits<4> op11_8, string OpcodeStr>
- : NLdSt<1,0b10,op11_8,0b0000,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
- nohash_imm:$lane), IIC_VLD4,
- !strconcat(OpcodeStr,
- "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr"),
- "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
-
-def VLD4LNd8 : VLD4LN<0b0011, "vld4.8">;
-def VLD4LNd16 : VLD4LN<0b0111, "vld4.16">;
-def VLD4LNd32 : VLD4LN<0b1011, "vld4.32">;
+class VLD4LN<bits<4> op11_8, string OpcodeStr, string Dt>
+ : NLdSt<1,0b10,op11_8,{?,?,?,?},
+ (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VLD4,
+ OpcodeStr, Dt,
+ "\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr",
+ "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
+
+// vld4 to single-spaced registers.
+def VLD4LNd8 : VLD4LN<0b0011, "vld4", "8">;
+def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> {
+ let Inst{5} = 0;
+}
+def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> {
+ let Inst{6} = 0;
+}
// vld4 to double-spaced even registers.
-def VLD4LNq16a: VLD4LN<0b0111, "vld4.16">;
-def VLD4LNq32a: VLD4LN<0b1011, "vld4.32">;
+def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> {
+ let Inst{5} = 1;
+}
+def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> {
+ let Inst{6} = 1;
+}
// vld4 to double-spaced odd registers.
-def VLD4LNq16b: VLD4LN<0b0111, "vld4.16">;
-def VLD4LNq32b: VLD4LN<0b1011, "vld4.32">;
+def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> {
+ let Inst{5} = 1;
+}
+def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> {
+ let Inst{6} = 1;
+}
// VLD1DUP : Vector Load (single element to all lanes)
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
@@ -349,178 +392,221 @@ def VLD4LNq32b: VLD4LN<0b1011, "vld4.32">;
} // mayLoad = 1, hasExtraDefRegAllocReq = 1
// VST1 : Vector Store (multiple single elements)
-class VST1D<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+class VST1D<bits<4> op7_4, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp>
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
- !strconcat(OpcodeStr, "\t\\{$src\\}, $addr"), "",
+ OpcodeStr, Dt, "\\{$src\\}, $addr", "",
[(IntOp addrmode6:$addr, (Ty DPR:$src))]>;
-class VST1Q<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+class VST1Q<bits<4> op7_4, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp>
: NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST,
- !strconcat(OpcodeStr, "\t${src:dregpair}, $addr"), "",
+ OpcodeStr, Dt, "${src:dregpair}, $addr", "",
[(IntOp addrmode6:$addr, (Ty QPR:$src))]>;
let hasExtraSrcRegAllocReq = 1 in {
-def VST1d8 : VST1D<0b0000, "vst1.8", v8i8, int_arm_neon_vst1>;
-def VST1d16 : VST1D<0b0100, "vst1.16", v4i16, int_arm_neon_vst1>;
-def VST1d32 : VST1D<0b1000, "vst1.32", v2i32, int_arm_neon_vst1>;
-def VST1df : VST1D<0b1000, "vst1.32", v2f32, int_arm_neon_vst1>;
-def VST1d64 : VST1D<0b1100, "vst1.64", v1i64, int_arm_neon_vst1>;
-
-def VST1q8 : VST1Q<0b0000, "vst1.8", v16i8, int_arm_neon_vst1>;
-def VST1q16 : VST1Q<0b0100, "vst1.16", v8i16, int_arm_neon_vst1>;
-def VST1q32 : VST1Q<0b1000, "vst1.32", v4i32, int_arm_neon_vst1>;
-def VST1qf : VST1Q<0b1000, "vst1.32", v4f32, int_arm_neon_vst1>;
-def VST1q64 : VST1Q<0b1100, "vst1.64", v2i64, int_arm_neon_vst1>;
+def VST1d8 : VST1D<0b0000, "vst1", "8", v8i8, int_arm_neon_vst1>;
+def VST1d16 : VST1D<0b0100, "vst1", "16", v4i16, int_arm_neon_vst1>;
+def VST1d32 : VST1D<0b1000, "vst1", "32", v2i32, int_arm_neon_vst1>;
+def VST1df : VST1D<0b1000, "vst1", "32", v2f32, int_arm_neon_vst1>;
+def VST1d64 : VST1D<0b1100, "vst1", "64", v1i64, int_arm_neon_vst1>;
+
+def VST1q8 : VST1Q<0b0000, "vst1", "8", v16i8, int_arm_neon_vst1>;
+def VST1q16 : VST1Q<0b0100, "vst1", "16", v8i16, int_arm_neon_vst1>;
+def VST1q32 : VST1Q<0b1000, "vst1", "32", v4i32, int_arm_neon_vst1>;
+def VST1qf : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>;
+def VST1q64 : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>;
} // hasExtraSrcRegAllocReq
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
// VST2 : Vector Store (multiple 2-element structures)
-class VST2D<bits<4> op7_4, string OpcodeStr>
+class VST2D<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b00,0b1000,op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
- !strconcat(OpcodeStr, "\t\\{$src1,$src2\\}, $addr"), "", []>;
-class VST2Q<bits<4> op7_4, string OpcodeStr>
+ OpcodeStr, Dt, "\\{$src1,$src2\\}, $addr", "", []>;
+class VST2Q<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b00,0b0011,op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
IIC_VST,
- !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"),
+ OpcodeStr, Dt, "\\{$src1,$src2,$src3,$src4\\}, $addr",
"", []>;
-def VST2d8 : VST2D<0b0000, "vst2.8">;
-def VST2d16 : VST2D<0b0100, "vst2.16">;
-def VST2d32 : VST2D<0b1000, "vst2.32">;
+def VST2d8 : VST2D<0b0000, "vst2", "8">;
+def VST2d16 : VST2D<0b0100, "vst2", "16">;
+def VST2d32 : VST2D<0b1000, "vst2", "32">;
def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
- "vst1.64\t\\{$src1,$src2\\}, $addr", "", []>;
+ "vst1", "64", "\\{$src1,$src2\\}, $addr", "", []>;
-def VST2q8 : VST2Q<0b0000, "vst2.8">;
-def VST2q16 : VST2Q<0b0100, "vst2.16">;
-def VST2q32 : VST2Q<0b1000, "vst2.32">;
+def VST2q8 : VST2Q<0b0000, "vst2", "8">;
+def VST2q16 : VST2Q<0b0100, "vst2", "16">;
+def VST2q32 : VST2Q<0b1000, "vst2", "32">;
// VST3 : Vector Store (multiple 3-element structures)
-class VST3D<bits<4> op7_4, string OpcodeStr>
+class VST3D<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b00,0b0100,op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
- !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), "", []>;
-class VST3WB<bits<4> op7_4, string OpcodeStr>
+ OpcodeStr, Dt, "\\{$src1,$src2,$src3\\}, $addr", "", []>;
+class VST3WB<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
- !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"),
+ OpcodeStr, Dt, "\\{$src1,$src2,$src3\\}, $addr",
"$addr.addr = $wb", []>;
-def VST3d8 : VST3D<0b0000, "vst3.8">;
-def VST3d16 : VST3D<0b0100, "vst3.16">;
-def VST3d32 : VST3D<0b1000, "vst3.32">;
+def VST3d8 : VST3D<0b0000, "vst3", "8">;
+def VST3d16 : VST3D<0b0100, "vst3", "16">;
+def VST3d32 : VST3D<0b1000, "vst3", "32">;
def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
IIC_VST,
- "vst1.64\t\\{$src1,$src2,$src3\\}, $addr", "", []>;
+ "vst1", "64", "\\{$src1,$src2,$src3\\}, $addr", "", []>;
// vst3 to double-spaced even registers.
-def VST3q8a : VST3WB<0b0000, "vst3.8">;
-def VST3q16a : VST3WB<0b0100, "vst3.16">;
-def VST3q32a : VST3WB<0b1000, "vst3.32">;
+def VST3q8a : VST3WB<0b0000, "vst3", "8">;
+def VST3q16a : VST3WB<0b0100, "vst3", "16">;
+def VST3q32a : VST3WB<0b1000, "vst3", "32">;
// vst3 to double-spaced odd registers.
-def VST3q8b : VST3WB<0b0000, "vst3.8">;
-def VST3q16b : VST3WB<0b0100, "vst3.16">;
-def VST3q32b : VST3WB<0b1000, "vst3.32">;
+def VST3q8b : VST3WB<0b0000, "vst3", "8">;
+def VST3q16b : VST3WB<0b0100, "vst3", "16">;
+def VST3q32b : VST3WB<0b1000, "vst3", "32">;
// VST4 : Vector Store (multiple 4-element structures)
-class VST4D<bits<4> op7_4, string OpcodeStr>
+class VST4D<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b00,0b0000,op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
IIC_VST,
- !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"),
+ OpcodeStr, Dt, "\\{$src1,$src2,$src3,$src4\\}, $addr",
"", []>;
-class VST4WB<bits<4> op7_4, string OpcodeStr>
+class VST4WB<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
IIC_VST,
- !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"),
+ OpcodeStr, Dt, "\\{$src1,$src2,$src3,$src4\\}, $addr",
"$addr.addr = $wb", []>;
-def VST4d8 : VST4D<0b0000, "vst4.8">;
-def VST4d16 : VST4D<0b0100, "vst4.16">;
-def VST4d32 : VST4D<0b1000, "vst4.32">;
+def VST4d8 : VST4D<0b0000, "vst4", "8">;
+def VST4d16 : VST4D<0b0100, "vst4", "16">;
+def VST4d32 : VST4D<0b1000, "vst4", "32">;
def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
DPR:$src4), IIC_VST,
- "vst1.64\t\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>;
+ "vst1", "64", "\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>;
// vst4 to double-spaced even registers.
-def VST4q8a : VST4WB<0b0000, "vst4.8">;
-def VST4q16a : VST4WB<0b0100, "vst4.16">;
-def VST4q32a : VST4WB<0b1000, "vst4.32">;
+def VST4q8a : VST4WB<0b0000, "vst4", "8">;
+def VST4q16a : VST4WB<0b0100, "vst4", "16">;
+def VST4q32a : VST4WB<0b1000, "vst4", "32">;
// vst4 to double-spaced odd registers.
-def VST4q8b : VST4WB<0b0000, "vst4.8">;
-def VST4q16b : VST4WB<0b0100, "vst4.16">;
-def VST4q32b : VST4WB<0b1000, "vst4.32">;
+def VST4q8b : VST4WB<0b0000, "vst4", "8">;
+def VST4q16b : VST4WB<0b0100, "vst4", "16">;
+def VST4q32b : VST4WB<0b1000, "vst4", "32">;
// VST1LN : Vector Store (single element from one lane)
// FIXME: Not yet implemented.
// VST2LN : Vector Store (single 2-element structure from one lane)
-class VST2LN<bits<4> op11_8, string OpcodeStr>
- : NLdSt<1,0b00,op11_8,0b0000, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
- IIC_VST,
- !strconcat(OpcodeStr, "\t\\{$src1[$lane],$src2[$lane]\\}, $addr"),
- "", []>;
-
-def VST2LNd8 : VST2LN<0b0001, "vst2.8">;
-def VST2LNd16 : VST2LN<0b0101, "vst2.16">;
-def VST2LNd32 : VST2LN<0b1001, "vst2.32">;
+class VST2LN<bits<4> op11_8, string OpcodeStr, string Dt>
+ : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+ IIC_VST,
+ OpcodeStr, Dt, "\\{$src1[$lane],$src2[$lane]\\}, $addr",
+ "", []>;
+
+// vst2 to single-spaced registers.
+def VST2LNd8 : VST2LN<0b0001, "vst2", "8">;
+def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> {
+ let Inst{5} = 0;
+}
+def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> {
+ let Inst{6} = 0;
+}
// vst2 to double-spaced even registers.
-def VST2LNq16a: VST2LN<0b0101, "vst2.16">;
-def VST2LNq32a: VST2LN<0b1001, "vst2.32">;
+def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> {
+ let Inst{5} = 1;
+}
+def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> {
+ let Inst{6} = 1;
+}
// vst2 to double-spaced odd registers.
-def VST2LNq16b: VST2LN<0b0101, "vst2.16">;
-def VST2LNq32b: VST2LN<0b1001, "vst2.32">;
+def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> {
+ let Inst{5} = 1;
+}
+def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> {
+ let Inst{6} = 1;
+}
// VST3LN : Vector Store (single 3-element structure from one lane)
-class VST3LN<bits<4> op11_8, string OpcodeStr>
- : NLdSt<1,0b00,op11_8,0b0000, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
- nohash_imm:$lane), IIC_VST,
- !strconcat(OpcodeStr,
- "\t\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr"), "", []>;
-
-def VST3LNd8 : VST3LN<0b0010, "vst3.8">;
-def VST3LNd16 : VST3LN<0b0110, "vst3.16">;
-def VST3LNd32 : VST3LN<0b1010, "vst3.32">;
+class VST3LN<bits<4> op11_8, string OpcodeStr, string Dt>
+ : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VST,
+ OpcodeStr, Dt,
+ "\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr", "", []>;
+
+// vst3 to single-spaced registers.
+def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> {
+ let Inst{4} = 0;
+}
+def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> {
+ let Inst{5-4} = 0b00;
+}
+def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> {
+ let Inst{6-4} = 0b000;
+}
// vst3 to double-spaced even registers.
-def VST3LNq16a: VST3LN<0b0110, "vst3.16">;
-def VST3LNq32a: VST3LN<0b1010, "vst3.32">;
+def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> {
+ let Inst{5-4} = 0b10;
+}
+def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> {
+ let Inst{6-4} = 0b100;
+}
// vst3 to double-spaced odd registers.
-def VST3LNq16b: VST3LN<0b0110, "vst3.16">;
-def VST3LNq32b: VST3LN<0b1010, "vst3.32">;
+def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> {
+ let Inst{5-4} = 0b10;
+}
+def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> {
+ let Inst{6-4} = 0b100;
+}
// VST4LN : Vector Store (single 4-element structure from one lane)
-class VST4LN<bits<4> op11_8, string OpcodeStr>
- : NLdSt<1,0b00,op11_8,0b0000, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
- nohash_imm:$lane), IIC_VST,
- !strconcat(OpcodeStr,
- "\t\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr"),
- "", []>;
-
-def VST4LNd8 : VST4LN<0b0011, "vst4.8">;
-def VST4LNd16 : VST4LN<0b0111, "vst4.16">;
-def VST4LNd32 : VST4LN<0b1011, "vst4.32">;
+class VST4LN<bits<4> op11_8, string OpcodeStr, string Dt>
+ : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VST,
+ OpcodeStr, Dt,
+ "\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr",
+ "", []>;
+
+// vst4 to single-spaced registers.
+def VST4LNd8 : VST4LN<0b0011, "vst4", "8">;
+def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> {
+ let Inst{5} = 0;
+}
+def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> {
+ let Inst{6} = 0;
+}
// vst4 to double-spaced even registers.
-def VST4LNq16a: VST4LN<0b0111, "vst4.16">;
-def VST4LNq32a: VST4LN<0b1011, "vst4.32">;
+def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> {
+ let Inst{5} = 1;
+}
+def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> {
+ let Inst{6} = 1;
+}
// vst4 to double-spaced odd registers.
-def VST4LNq16b: VST4LN<0b0111, "vst4.16">;
-def VST4LNq32b: VST4LN<0b1011, "vst4.32">;
+def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> {
+ let Inst{5} = 1;
+}
+def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> {
+ let Inst{6} = 1;
+}
} // mayStore = 1, hasExtraSrcRegAllocReq = 1
@@ -570,25 +656,25 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{
// Basic 2-register operations, both double- and quad-register.
class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
- (ins DPR:$src), IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>;
class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
- (ins QPR:$src), IIC_VUNAQ, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>;
// Basic 2-register operations, scalar single-precision.
class N2VDs<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
(outs DPR_VFP2:$dst), (ins DPR_VFP2:$src),
- IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src"), "", []>;
+ IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>;
class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
: NEONFPPat<(ResTy (OpNode SPR:$a)),
@@ -599,27 +685,27 @@ class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
// Basic 2-register intrinsics, both double- and quad-register.
class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
- InstrItinClass itin, string OpcodeStr,
+ InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
- (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
- InstrItinClass itin, string OpcodeStr,
+ InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
- (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
// Basic 2-register intrinsics, scalar single-precision
class N2VDInts<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
- InstrItinClass itin, string OpcodeStr,
+ InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
(outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin,
- !strconcat(OpcodeStr, "\t$dst, $src"), "", []>;
+ OpcodeStr, Dt, "$dst, $src", "", []>;
class N2VDIntsPat<SDNode OpNode, NeonI Inst>
: NEONFPPat<(f32 (OpNode SPR:$a)),
@@ -630,49 +716,62 @@ class N2VDIntsPat<SDNode OpNode, NeonI Inst>
// Narrow 2-register intrinsics.
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
- InstrItinClass itin, string OpcodeStr,
+ InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyD, ValueType TyQ, Intrinsic IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
- (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>;
// Long 2-register intrinsics (currently only used for VMOVL).
class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
- InstrItinClass itin, string OpcodeStr,
+ InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, Intrinsic IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst),
- (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>;
// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
-class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr>
+class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
: N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2),
(ins DPR:$src1, DPR:$src2), IIC_VPERMD,
- !strconcat(OpcodeStr, "\t$dst1, $dst2"),
+ OpcodeStr, Dt, "$dst1, $dst2",
"$src1 = $dst1, $src2 = $dst2", []>;
class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
- InstrItinClass itin, string OpcodeStr>
+ InstrItinClass itin, string OpcodeStr, string Dt>
: N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2),
(ins QPR:$src1, QPR:$src2), itin,
- !strconcat(OpcodeStr, "\t$dst1, $dst2"),
+ OpcodeStr, Dt, "$dst1, $dst2",
"$src1 = $dst1, $src2 = $dst2", []>;
// Basic 3-register operations, both double- and quad-register.
class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy,
SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
- !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
+ let isCommutable = Commutable;
+}
+// Same as N3VD but no data type.
+class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr,
+ ValueType ResTy, ValueType OpTy,
+ SDNode OpNode, bit Commutable>
+ : N3VX<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
+ OpcodeStr, "$dst, $src1, $src2", "",
[(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
let isCommutable = Commutable;
}
class N3VDSL<bits<2> op21_20, bits<4> op11_8,
- InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode ShOp>
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+ itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
(Ty (NEONvduplane (Ty DPR_VFP2:$src2),
@@ -680,11 +779,11 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
let isCommutable = 0;
}
class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
- string OpcodeStr, ValueType Ty, SDNode ShOp>
+ string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
IIC_VMULi16D,
- !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+ OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
(Ty (NEONvduplane (Ty DPR_8:$src2),
@@ -693,20 +792,31 @@ class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
}
class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy,
SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
- !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
+ let isCommutable = Commutable;
+}
+class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr,
+ ValueType ResTy, ValueType OpTy,
+ SDNode OpNode, bit Commutable>
+ : N3VX<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
+ OpcodeStr, "$dst, $src1, $src2", "",
[(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
let isCommutable = Commutable;
}
class N3VQSL<bits<2> op21_20, bits<4> op11_8,
- InstrItinClass itin, string OpcodeStr,
+ InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+ itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
@@ -714,11 +824,12 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
let isCommutable = 0;
}
class N3VQSL16<bits<2> op21_20, bits<4> op11_8,
- string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode ShOp>
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
IIC_VMULi16Q,
- !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+ OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (NEONvduplane (OpTy DPR_8:$src2),
@@ -728,11 +839,11 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8,
// Basic 3-register operations, scalar single-precision
class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- string OpcodeStr, ValueType ResTy, ValueType OpTy,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
- !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", []> {
+ OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
let isCommutable = Commutable;
}
class N3VDsPat<SDNode OpNode, NeonI Inst>
@@ -744,19 +855,20 @@ class N3VDsPat<SDNode OpNode, NeonI Inst>
// Basic 3-register intrinsics, both double- and quad-register.
class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy,
Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
- !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
let isCommutable = Commutable;
}
class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+ string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+ itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (Ty DPR:$dst),
(Ty (IntOp (Ty DPR:$src1),
(Ty (NEONvduplane (Ty DPR_VFP2:$src2),
@@ -764,10 +876,10 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
let isCommutable = 0;
}
class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+ string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+ itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (Ty DPR:$dst),
(Ty (IntOp (Ty DPR:$src1),
(Ty (NEONvduplane (Ty DPR_8:$src2),
@@ -776,19 +888,21 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
}
class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy,
Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
- !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
let isCommutable = Commutable;
}
class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+ itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (ResTy QPR:$src1),
(ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
@@ -796,10 +910,11 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
let isCommutable = 0;
}
class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+ itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (ResTy QPR:$src1),
(ResTy (NEONvduplane (OpTy DPR_8:$src2),
@@ -809,30 +924,32 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
// Multiply-Add/Sub operations, both double- and quad-register.
class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr,
+ InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode MulOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin,
- !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
[(set DPR:$dst, (Ty (OpNode DPR:$src1,
(Ty (MulOp DPR:$src2, DPR:$src3)))))]>;
class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp>
+ string OpcodeStr, string Dt,
+ ValueType Ty, SDNode MulOp, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst),
(ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin,
- !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
(Ty (MulOp DPR:$src2,
(Ty (NEONvduplane (Ty DPR_VFP2:$src3),
imm:$lane)))))))]>;
class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp>
+ string OpcodeStr, string Dt,
+ ValueType Ty, SDNode MulOp, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst),
(ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
- !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
(Ty (MulOp DPR:$src2,
@@ -840,32 +957,33 @@ class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
imm:$lane)))))))]>;
class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, ValueType Ty,
+ InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
SDNode MulOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin,
- !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
[(set QPR:$dst, (Ty (OpNode QPR:$src1,
(Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, ValueType ResTy, ValueType OpTy,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
SDNode MulOp, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst),
(ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin,
- !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (MulOp QPR:$src2,
(ResTy (NEONvduplane (OpTy DPR_VFP2:$src3),
imm:$lane)))))))]>;
class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, ValueType ResTy, ValueType OpTy,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy,
SDNode MulOp, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst),
(ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
- !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (MulOp QPR:$src2,
@@ -874,12 +992,12 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
// Multiply-Add/Sub operations, scalar single-precision
class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr,
+ InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode MulOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR_VFP2:$dst),
(ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin,
- !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", []>;
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
: NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
@@ -892,50 +1010,51 @@ class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
// Neon 3-argument intrinsics, both double- and quad-register.
// The destination register is also used as the first source operand register.
class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr,
+ InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin,
- !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
[(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1),
(OpTy DPR:$src2), (OpTy DPR:$src3))))]>;
class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr,
+ InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin,
- !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1),
(OpTy QPR:$src2), (OpTy QPR:$src3))))]>;
// Neon Long 3-argument intrinsic. The destination register is
// a quad-register and is also used as the first source operand register.
class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr,
+ InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, Intrinsic IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), itin,
- !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
[(set QPR:$dst,
(TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>;
class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst),
(ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin,
- !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (ResTy QPR:$src1),
(OpTy DPR:$src2),
(OpTy (NEONvduplane (OpTy DPR_VFP2:$src3),
imm:$lane)))))]>;
class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, ValueType ResTy, ValueType OpTy,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst),
(ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
- !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (ResTy QPR:$src1),
(OpTy DPR:$src2),
@@ -945,40 +1064,41 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass iti
// Narrowing 3-register intrinsics.
class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- string OpcodeStr, ValueType TyD, ValueType TyQ,
+ string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINi4D,
- !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> {
let isCommutable = Commutable;
}
// Long 3-register intrinsics.
class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, ValueType TyQ, ValueType TyD,
- Intrinsic IntOp, bit Commutable>
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
- !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> {
let isCommutable = Commutable;
}
class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+ itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (OpTy DPR:$src1),
(OpTy (NEONvduplane (OpTy DPR_VFP2:$src2),
imm:$lane)))))]>;
class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, ValueType ResTy, ValueType OpTy,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+ itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (OpTy DPR:$src1),
(OpTy (NEONvduplane (OpTy DPR_8:$src2),
@@ -986,128 +1106,135 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin
// Wide 3-register intrinsics.
class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- string OpcodeStr, ValueType TyQ, ValueType TyD,
+ string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$dst), (ins QPR:$src1, DPR:$src2), IIC_VSUBiD,
- !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> {
let isCommutable = Commutable;
}
// Pairwise long 2-register intrinsics, both double- and quad-register.
class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
- (ins DPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
- (ins QPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
// Pairwise long 2-register accumulate intrinsics,
// both double- and quad-register.
// The destination register is also used as the first source operand register.
class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
(outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD,
- !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst",
[(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>;
class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
(outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ,
- !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst",
[(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>;
// Shift by immediate,
// both double- and quad-register.
class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode>
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, 0, op4,
(outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), itin,
- !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
+ OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>;
class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode>
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, 1, op4,
(outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin,
- !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
+ OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>;
// Long shift by immediate.
class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
- string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
(outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VSHLiD,
- !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
+ OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src),
(i32 imm:$SIMM))))]>;
// Narrow shift by immediate.
class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
- InstrItinClass itin, string OpcodeStr,
+ InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
(outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin,
- !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
+ OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src),
(i32 imm:$SIMM))))]>;
// Shift right by immediate and accumulate,
// both double- and quad-register.
class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- string OpcodeStr, ValueType Ty, SDNode ShOp>
+ string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
(ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VPALiD,
- !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
[(set DPR:$dst, (Ty (add DPR:$src1,
(Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>;
class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- string OpcodeStr, ValueType Ty, SDNode ShOp>
+ string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
(ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VPALiD,
- !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
[(set QPR:$dst, (Ty (add QPR:$src1,
(Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>;
// Shift by immediate and insert,
// both double- and quad-register.
class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- string OpcodeStr, ValueType Ty, SDNode ShOp>
+ string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
(ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VSHLiD,
- !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
[(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>;
class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- string OpcodeStr, ValueType Ty, SDNode ShOp>
+ string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
(ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VSHLiQ,
- !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
+ OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
[(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>;
// Convert, with fractional bits immediate,
// both double- and quad-register.
class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- string OpcodeStr, ValueType ResTy, ValueType OpTy,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
Intrinsic IntOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4,
(outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VUNAD,
- !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
+ OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>;
class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- string OpcodeStr, ValueType ResTy, ValueType OpTy,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
Intrinsic IntOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4,
(outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), IIC_VUNAQ,
- !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
+ OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>;
//===----------------------------------------------------------------------===//
@@ -1126,41 +1253,55 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
- string OpcodeStr, SDNode OpNode, bit Commutable = 0> {
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
// 64-bit vector types.
def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
- !strconcat(OpcodeStr, "8"), v8i8, v8i8, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, OpNode, Commutable>;
def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
- !strconcat(OpcodeStr, "16"), v4i16, v4i16, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, OpNode, Commutable>;
def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
- !strconcat(OpcodeStr, "32"), v2i32, v2i32, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, OpNode, Commutable>;
// 128-bit vector types.
def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
- !strconcat(OpcodeStr, "8"), v16i8, v16i8, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, OpNode, Commutable>;
def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
- !strconcat(OpcodeStr, "16"), v8i16, v8i16, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, OpNode, Commutable>;
def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
- !strconcat(OpcodeStr, "32"), v4i32, v4i32, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, OpNode, Commutable>;
}
-multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
- def v4i16 : N3VDSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v4i16, ShOp>;
- def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, !strconcat(OpcodeStr, "32"), v2i32, ShOp>;
- def v8i16 : N3VQSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v8i16, v4i16, ShOp>;
- def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, !strconcat(OpcodeStr, "32"), v4i32, v2i32, ShOp>;
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
+ def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, ShOp>;
+ def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
+ v2i32, ShOp>;
+ def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v4i16, ShOp>;
+ def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
+ v4i32, v2i32, ShOp>;
}
// ....then also with element size 64 bits:
multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD, InstrItinClass itinQ,
- string OpcodeStr, SDNode OpNode, bit Commutable = 0>
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0>
: N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
- OpcodeStr, OpNode, Commutable> {
+ OpcodeStr, Dt, OpNode, Commutable> {
def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
- !strconcat(OpcodeStr, "64"), v1i64, v1i64, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, OpNode, Commutable>;
def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
- !strconcat(OpcodeStr, "64"), v2i64, v2i64, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, OpNode, Commutable>;
}
@@ -1168,27 +1309,30 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
// source operand element sizes of 16, 32 and 64 bits:
multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op6, bit op4,
- InstrItinClass itin, string OpcodeStr,
+ InstrItinClass itin, string OpcodeStr, string Dt,
Intrinsic IntOp> {
def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
- itin, !strconcat(OpcodeStr, "16"), v8i8, v8i16, IntOp>;
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, IntOp>;
def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
- itin, !strconcat(OpcodeStr, "32"), v4i16, v4i32, IntOp>;
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, IntOp>;
def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
- itin, !strconcat(OpcodeStr, "64"), v2i32, v2i64, IntOp>;
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, IntOp>;
}
// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
// source operand element sizes of 16, 32 and 64 bits:
multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
- string OpcodeStr, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>;
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
}
@@ -1198,66 +1342,85 @@ multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
- string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> {
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0> {
// 64-bit vector types.
- def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, itinD16, !strconcat(OpcodeStr,"16"),
+ def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
v4i16, v4i16, IntOp, Commutable>;
- def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, itinD32, !strconcat(OpcodeStr,"32"),
+ def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
v2i32, v2i32, IntOp, Commutable>;
// 128-bit vector types.
- def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, itinQ16, !strconcat(OpcodeStr,"16"),
+ def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
v8i16, v8i16, IntOp, Commutable>;
- def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, itinQ32, !strconcat(OpcodeStr,"32"),
+ def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
v4i32, v4i32, IntOp, Commutable>;
}
multiclass N3VIntSL_HS<bits<4> op11_8,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
- string OpcodeStr, Intrinsic IntOp> {
- def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, !strconcat(OpcodeStr, "16"), v4i16, IntOp>;
- def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, !strconcat(OpcodeStr, "32"), v2i32, IntOp>;
- def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, !strconcat(OpcodeStr, "16"), v8i16, v4i16, IntOp>;
- def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, !strconcat(OpcodeStr, "32"), v4i32, v2i32, IntOp>;
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
+ def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
+ def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
+ def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
}
// ....then also with element size of 8 bits:
multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
- string OpcodeStr, Intrinsic IntOp, bit Commutable = 0>
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0>
: N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
- OpcodeStr, IntOp, Commutable> {
+ OpcodeStr, Dt, IntOp, Commutable> {
def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16,
- !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp, Commutable>;
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, IntOp, Commutable>;
def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16,
- !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp, Commutable>;
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, IntOp, Commutable>;
}
// ....then also with element size of 64 bits:
multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
- string OpcodeStr, Intrinsic IntOp, bit Commutable = 0>
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0>
: N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
- OpcodeStr, IntOp, Commutable> {
+ OpcodeStr, Dt, IntOp, Commutable> {
def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32,
- !strconcat(OpcodeStr,"64"), v1i64, v1i64, IntOp, Commutable>;
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, IntOp, Commutable>;
def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32,
- !strconcat(OpcodeStr,"64"), v2i64, v2i64, IntOp, Commutable>;
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, IntOp, Commutable>;
}
// Neon Narrowing 3-register vector intrinsics,
// source operand element sizes of 16, 32 and 64 bits:
multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
- string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> {
- def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr,"16"),
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0> {
+ def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
v8i8, v8i16, IntOp, Commutable>;
- def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"32"),
+ def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
v4i16, v4i32, IntOp, Commutable>;
- def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"64"),
+ def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "64"),
v2i32, v2i64, IntOp, Commutable>;
}
@@ -1266,41 +1429,50 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
// First with only element sizes of 16 and 32 bits:
multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr,
+ InstrItinClass itin, string OpcodeStr, string Dt,
Intrinsic IntOp, bit Commutable = 0> {
def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin,
- !strconcat(OpcodeStr,"16"), v4i32, v4i16, IntOp, Commutable>;
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, Commutable>;
def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin,
- !strconcat(OpcodeStr,"32"), v2i64, v2i32, IntOp, Commutable>;
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, Commutable>;
}
multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
- InstrItinClass itin, string OpcodeStr, Intrinsic IntOp> {
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp> {
def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
- !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
- !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
}
// ....then also with element size of 8 bits:
multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr,
+ InstrItinClass itin, string OpcodeStr, string Dt,
Intrinsic IntOp, bit Commutable = 0>
- : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, IntOp, Commutable> {
+ : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, Dt,
+ IntOp, Commutable> {
def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin,
- !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp, Commutable>;
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, Commutable>;
}
// Neon Wide 3-register vector intrinsics,
// source operand element sizes of 8, 16 and 32 bits:
multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
- string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> {
- def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"),
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0> {
+ def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "8"),
v8i16, v8i8, IntOp, Commutable>;
- def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"),
+ def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
v4i32, v4i16, IntOp, Commutable>;
- def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"),
+ def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
v2i64, v2i32, IntOp, Commutable>;
}
@@ -1310,57 +1482,57 @@ multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
- string OpcodeStr, SDNode OpNode> {
+ string OpcodeStr, string Dt, SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
- !strconcat(OpcodeStr, "8"), v8i8, mul, OpNode>;
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
- !strconcat(OpcodeStr, "16"), v4i16, mul, OpNode>;
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
- !strconcat(OpcodeStr, "32"), v2i32, mul, OpNode>;
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
// 128-bit vector types.
def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
- !strconcat(OpcodeStr, "8"), v16i8, mul, OpNode>;
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
- !strconcat(OpcodeStr, "16"), v8i16, mul, OpNode>;
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
- !strconcat(OpcodeStr, "32"), v4i32, mul, OpNode>;
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
}
multiclass N3VMulOpSL_HS<bits<4> op11_8,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
- string OpcodeStr, SDNode ShOp> {
+ string OpcodeStr, string Dt, SDNode ShOp> {
def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
- !strconcat(OpcodeStr, "16"), v4i16, mul, ShOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
- !strconcat(OpcodeStr, "32"), v2i32, mul, ShOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
- !strconcat(OpcodeStr, "16"), v8i16, v4i16, mul, ShOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, mul, ShOp>;
def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
- !strconcat(OpcodeStr, "32"), v4i32, v2i32, mul, ShOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, mul, ShOp>;
}
// Neon 3-argument intrinsics,
// element sizes of 8, 16 and 32 bits:
multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
- string OpcodeStr, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
// 64-bit vector types.
def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D,
- !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>;
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D,
- !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D,
- !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
// 128-bit vector types.
def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q,
- !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>;
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q,
- !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q,
- !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
}
@@ -1368,27 +1540,27 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
// First with only element sizes of 16 and 32 bits:
multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
- string OpcodeStr, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D,
- !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi16D,
- !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
}
multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
- string OpcodeStr, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
- !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>;
+ OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
- !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
}
// ....then also with element size of 8 bits:
multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
- string OpcodeStr, Intrinsic IntOp>
- : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp> {
+ string OpcodeStr, string Dt, Intrinsic IntOp>
+ : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, Dt, IntOp> {
def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D,
- !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>;
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
}
@@ -1397,22 +1569,22 @@ multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4,
InstrItinClass itinD, InstrItinClass itinQ,
- string OpcodeStr, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
// 64-bit vector types.
def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
- itinD, !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>;
+ itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
- itinD, !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>;
+ itinD, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
- itinD, !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>;
+ itinD, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
// 128-bit vector types.
def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
- itinQ, !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>;
+ itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
- itinQ, !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>;
+ itinQ, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
- itinQ, !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>;
+ itinQ, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
}
@@ -1420,22 +1592,22 @@ multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
// element sizes of 8, 16 and 32 bits:
multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4,
- string OpcodeStr, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
// 64-bit vector types.
def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
- !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>;
+ OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
- !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
- !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
// 128-bit vector types.
def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
- !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>;
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
- !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
- !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
}
@@ -1443,61 +1615,62 @@ multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
// element sizes of 8, 16 and 32 bits:
multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4,
- string OpcodeStr, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
// 64-bit vector types.
def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
- !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>;
+ OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
- !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
- !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
// 128-bit vector types.
def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
- !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>;
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
- !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
- !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
}
// Neon 2-register vector shift by immediate,
// element sizes of 8, 16, 32 and 64 bits:
multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, SDNode OpNode> {
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, itin,
- !strconcat(OpcodeStr, "8"), v8i8, OpNode> {
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, itin,
- !strconcat(OpcodeStr, "16"), v4i16, OpNode> {
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, itin,
- !strconcat(OpcodeStr, "32"), v2i32, OpNode> {
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, itin,
- !strconcat(OpcodeStr, "64"), v1i64, OpNode>;
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
// imm6 = xxxxxx
// 128-bit vector types.
def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, itin,
- !strconcat(OpcodeStr, "8"), v16i8, OpNode> {
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, itin,
- !strconcat(OpcodeStr, "16"), v8i16, OpNode> {
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, itin,
- !strconcat(OpcodeStr, "32"), v4i32, OpNode> {
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, itin,
- !strconcat(OpcodeStr, "64"), v2i64, OpNode>;
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
// imm6 = xxxxxx
}
@@ -1505,39 +1678,39 @@ multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
// Neon Shift-Accumulate vector operations,
// element sizes of 8, 16, 32 and 64 bits:
multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
- string OpcodeStr, SDNode ShOp> {
+ string OpcodeStr, string Dt, SDNode ShOp> {
// 64-bit vector types.
def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4,
- !strconcat(OpcodeStr, "8"), v8i8, ShOp> {
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4,
- !strconcat(OpcodeStr, "16"), v4i16, ShOp> {
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4,
- !strconcat(OpcodeStr, "32"), v2i32, ShOp> {
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4,
- !strconcat(OpcodeStr, "64"), v1i64, ShOp>;
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
// imm6 = xxxxxx
// 128-bit vector types.
def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4,
- !strconcat(OpcodeStr, "8"), v16i8, ShOp> {
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4,
- !strconcat(OpcodeStr, "16"), v8i16, ShOp> {
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4,
- !strconcat(OpcodeStr, "32"), v4i32, ShOp> {
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4,
- !strconcat(OpcodeStr, "64"), v2i64, ShOp>;
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
// imm6 = xxxxxx
}
@@ -1548,53 +1721,53 @@ multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
string OpcodeStr, SDNode ShOp> {
// 64-bit vector types.
def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4,
- !strconcat(OpcodeStr, "8"), v8i8, ShOp> {
+ OpcodeStr, "8", v8i8, ShOp> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4,
- !strconcat(OpcodeStr, "16"), v4i16, ShOp> {
+ OpcodeStr, "16", v4i16, ShOp> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4,
- !strconcat(OpcodeStr, "32"), v2i32, ShOp> {
+ OpcodeStr, "32", v2i32, ShOp> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4,
- !strconcat(OpcodeStr, "64"), v1i64, ShOp>;
+ OpcodeStr, "64", v1i64, ShOp>;
// imm6 = xxxxxx
// 128-bit vector types.
def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4,
- !strconcat(OpcodeStr, "8"), v16i8, ShOp> {
+ OpcodeStr, "8", v16i8, ShOp> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4,
- !strconcat(OpcodeStr, "16"), v8i16, ShOp> {
+ OpcodeStr, "16", v8i16, ShOp> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4,
- !strconcat(OpcodeStr, "32"), v4i32, ShOp> {
+ OpcodeStr, "32", v4i32, ShOp> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4,
- !strconcat(OpcodeStr, "64"), v2i64, ShOp>;
+ OpcodeStr, "64", v2i64, ShOp>;
// imm6 = xxxxxx
}
// Neon Shift Long operations,
// element sizes of 8, 16, 32 bits:
multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
- bit op4, string OpcodeStr, SDNode OpNode> {
+ bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- !strconcat(OpcodeStr, "8"), v8i16, v8i8, OpNode> {
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- !strconcat(OpcodeStr, "16"), v4i32, v4i16, OpNode> {
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- !strconcat(OpcodeStr, "32"), v2i64, v2i32, OpNode> {
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
}
@@ -1602,18 +1775,18 @@ multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
// Neon Shift Narrow operations,
// element sizes of 16, 32, 64 bits:
multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
- bit op4, InstrItinClass itin, string OpcodeStr,
+ bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
SDNode OpNode> {
def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
- !strconcat(OpcodeStr, "16"), v8i8, v8i16, OpNode> {
+ OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
- !strconcat(OpcodeStr, "32"), v4i16, v4i32, OpNode> {
+ OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
- !strconcat(OpcodeStr, "64"), v2i32, v2i64, OpNode> {
+ OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
}
@@ -1625,49 +1798,58 @@ multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
// Vector Add Operations.
// VADD : Vector Add (integer and floating-point)
-defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd.i", add, 1>;
-def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd.f32", v2f32, v2f32, fadd, 1>;
-def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd.f32", v4f32, v4f32, fadd, 1>;
+defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
+ add, 1>;
+def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
+ v2f32, v2f32, fadd, 1>;
+def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
+ v4f32, v4f32, fadd, 1>;
// VADDL : Vector Add Long (Q = D + D)
-defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl.s", int_arm_neon_vaddls, 1>;
-defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl.u", int_arm_neon_vaddlu, 1>;
+defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl", "s",
+ int_arm_neon_vaddls, 1>;
+defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u",
+ int_arm_neon_vaddlu, 1>;
// VADDW : Vector Add Wide (Q = Q + D)
-defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>;
-defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>;
+defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>;
+defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>;
// VHADD : Vector Halving Add
defm VHADDs : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vhadd.s", int_arm_neon_vhadds, 1>;
+ IIC_VBINi4Q, "vhadd", "s", int_arm_neon_vhadds, 1>;
defm VHADDu : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vhadd.u", int_arm_neon_vhaddu, 1>;
+ IIC_VBINi4Q, "vhadd", "u", int_arm_neon_vhaddu, 1>;
// VRHADD : Vector Rounding Halving Add
defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vrhadd.s", int_arm_neon_vrhadds, 1>;
+ IIC_VBINi4Q, "vrhadd", "s", int_arm_neon_vrhadds, 1>;
defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vrhadd.u", int_arm_neon_vrhaddu, 1>;
+ IIC_VBINi4Q, "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
// VQADD : Vector Saturating Add
defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vqadd.s", int_arm_neon_vqadds, 1>;
+ IIC_VBINi4Q, "vqadd", "s", int_arm_neon_vqadds, 1>;
defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vqadd.u", int_arm_neon_vqaddu, 1>;
+ IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>;
// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
-defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>;
+defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
+ int_arm_neon_vaddhn, 1>;
// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
-defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>;
+defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
+ int_arm_neon_vraddhn, 1>;
// Vector Multiply Operations.
// VMUL : Vector Multiply (integer, polynomial and floating-point)
-defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q,
- IIC_VMULi32Q, "vmul.i", mul, 1>;
-def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul.p8", v8i8, v8i8,
- int_arm_neon_vmulp, 1>;
-def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul.p8", v16i8, v16i8,
- int_arm_neon_vmulp, 1>;
-def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul.f32", v2f32, v2f32, fmul, 1>;
-def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul.f32", v4f32, v4f32, fmul, 1>;
-defm VMULsl : N3VSL_HS<0b1000, "vmul.i", mul>;
-def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul.f32", v2f32, fmul>;
-def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul.f32", v4f32, v2f32, fmul>;
+defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
+def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul", "p8",
+ v8i8, v8i8, int_arm_neon_vmulp, 1>;
+def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul", "p8",
+ v16i8, v16i8, int_arm_neon_vmulp, 1>;
+def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32",
+ v2f32, v2f32, fmul, 1>;
+def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32",
+ v4f32, v4f32, fmul, 1>;
+defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
+def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
+def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>;
def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
(v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
(v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
@@ -1690,66 +1872,80 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
- "vqdmulh.s", int_arm_neon_vqdmulh, 1>;
+ "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
- "vqdmulh.s", int_arm_neon_vqdmulh>;
+ "vqdmulh", "s", int_arm_neon_vqdmulh>;
def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
- (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+ imm:$lane)))),
(v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
(v4i16 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i16_reg imm:$lane))),
+ (DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
- (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+ imm:$lane)))),
(v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
(v2i32 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
- "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>;
+ "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
- "vqrdmulh.s", int_arm_neon_vqrdmulh>;
+ "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
- (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+ imm:$lane)))),
(v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
(v4i16 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
- (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+ imm:$lane)))),
(v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
(v2i32 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls, 1>;
-defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu, 1>;
-def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull.p8", v8i16, v8i8,
- int_arm_neon_vmullp, 1>;
-defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls>;
-defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu>;
+defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull", "s",
+ int_arm_neon_vmulls, 1>;
+defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull", "u",
+ int_arm_neon_vmullu, 1>;
+def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
+ v8i16, v8i8, int_arm_neon_vmullp, 1>;
+defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s",
+ int_arm_neon_vmulls>;
+defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u",
+ int_arm_neon_vmullu>;
// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
-defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull, 1>;
-defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull>;
+defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull", "s",
+ int_arm_neon_vqdmull, 1>;
+defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull", "s",
+ int_arm_neon_vqdmull>;
// Vector Multiply-Accumulate and Multiply-Subtract Operations.
// VMLA : Vector Multiply Accumulate (integer and floating-point)
defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
- IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>;
-def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>;
-def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla.f32", v4f32, fmul, fadd>;
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
+ v2f32, fmul, fadd>;
+def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
+ v4f32, fmul, fadd>;
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
- IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>;
-def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>;
-def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla.f32", v4f32, v2f32, fmul, fadd>;
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
+ v2f32, fmul, fadd>;
+def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
+ v4f32, v2f32, fmul, fadd>;
def : Pat<(v8i16 (add (v8i16 QPR:$src1),
(mul (v8i16 QPR:$src2),
@@ -1766,7 +1962,7 @@ def : Pat<(v4i32 (add (v4i32 QPR:$src1),
(v4i32 (VMLAslv4i32 (v4i32 QPR:$src1),
(v4i32 QPR:$src2),
(v2i32 (EXTRACT_SUBREG QPR:$src3,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
@@ -1779,25 +1975,30 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
-defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal.s", int_arm_neon_vmlals>;
-defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal.u", int_arm_neon_vmlalu>;
+defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal", "s", int_arm_neon_vmlals>;
+defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal", "u", int_arm_neon_vmlalu>;
-defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal.s", int_arm_neon_vmlals>;
-defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal.u", int_arm_neon_vmlalu>;
+defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>;
+defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>;
// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
-defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>;
-defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal.s", int_arm_neon_vqdmlal>;
+defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal", "s",
+ int_arm_neon_vqdmlal>;
+defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
// VMLS : Vector Multiply Subtract (integer and floating-point)
defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
- IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>;
-def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>;
-def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls.f32", v4f32, fmul, fsub>;
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
+ v2f32, fmul, fsub>;
+def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
+ v4f32, fmul, fsub>;
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
- IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>;
-def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>;
-def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls.f32", v4f32, v2f32, fmul, fsub>;
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
+ v2f32, fmul, fsub>;
+def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
+ v4f32, v2f32, fmul, fsub>;
def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
(mul (v8i16 QPR:$src2),
@@ -1810,7 +2011,7 @@ def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
(mul (v4i32 QPR:$src2),
- (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
(v4i32 (VMLSslv4i32 (v4i32 QPR:$src1),
(v4i32 QPR:$src2),
(v2i32 (EXTRACT_SUBREG QPR:$src3,
@@ -1819,7 +2020,7 @@ def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
(fmul (v4f32 QPR:$src2),
- (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
(v4f32 (VMLSslfq (v4f32 QPR:$src1),
(v4f32 QPR:$src2),
(v2f32 (EXTRACT_SUBREG QPR:$src3,
@@ -1827,146 +2028,170 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
-defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl.s", int_arm_neon_vmlsls>;
-defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>;
+defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl", "s", int_arm_neon_vmlsls>;
+defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl", "u", int_arm_neon_vmlslu>;
-defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl.s", int_arm_neon_vmlsls>;
-defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl.u", int_arm_neon_vmlslu>;
+defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>;
+defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>;
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
-defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>;
-defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl.s", int_arm_neon_vqdmlsl>;
+defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl", "s",
+ int_arm_neon_vqdmlsl>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
// Vector Subtract Operations.
// VSUB : Vector Subtract (integer and floating-point)
-defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, "vsub.i", sub, 0>;
-def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub.f32", v2f32, v2f32, fsub, 0>;
-def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub.f32", v4f32, v4f32, fsub, 0>;
+defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
+ "vsub", "i", sub, 0>;
+def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
+ v2f32, v2f32, fsub, 0>;
+def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
+ v4f32, v4f32, fsub, 0>;
// VSUBL : Vector Subtract Long (Q = D - D)
-defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl.s", int_arm_neon_vsubls, 1>;
-defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl.u", int_arm_neon_vsublu, 1>;
+defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl", "s",
+ int_arm_neon_vsubls, 1>;
+defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u",
+ int_arm_neon_vsublu, 1>;
// VSUBW : Vector Subtract Wide (Q = Q - D)
-defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>;
-defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>;
+defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>;
+defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>;
// VHSUB : Vector Halving Subtract
-defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vhsub.s", int_arm_neon_vhsubs, 0>;
-defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vhsub.u", int_arm_neon_vhsubu, 0>;
+defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D,
+ IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhsub", "s", int_arm_neon_vhsubs, 0>;
+defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D,
+ IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhsub", "u", int_arm_neon_vhsubu, 0>;
// VQSUB : Vector Saturing Subtract
-defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vqsub.s", int_arm_neon_vqsubs, 0>;
-defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vqsub.u", int_arm_neon_vqsubu, 0>;
+defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D,
+ IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqsub", "s", int_arm_neon_vqsubs, 0>;
+defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D,
+ IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqsub", "u", int_arm_neon_vqsubu, 0>;
// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
-defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>;
+defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
+ int_arm_neon_vsubhn, 0>;
// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
-defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>;
+defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
+ int_arm_neon_vrsubhn, 0>;
// Vector Comparisons.
// VCEQ : Vector Compare Equal
defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vceq.i", NEONvceq, 1>;
-def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq.f32", v2i32, v2f32, NEONvceq, 1>;
-def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq.f32", v4i32, v4f32, NEONvceq, 1>;
+ IIC_VBINi4Q, "vceq", "i", NEONvceq, 1>;
+def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
+ NEONvceq, 1>;
+def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
+ NEONvceq, 1>;
// VCGE : Vector Compare Greater Than or Equal
defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vcge.s", NEONvcge, 0>;
+ IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>;
defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vcge.u", NEONvcgeu, 0>;
-def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge.f32", v2i32, v2f32, NEONvcge, 0>;
-def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge.f32", v4i32, v4f32, NEONvcge, 0>;
+ IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>;
+def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32",
+ v2i32, v2f32, NEONvcge, 0>;
+def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
+ NEONvcge, 0>;
// VCGT : Vector Compare Greater Than
defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vcgt.s", NEONvcgt, 0>;
+ IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>;
defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vcgt.u", NEONvcgtu, 0>;
-def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>;
-def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>;
+ IIC_VBINi4Q, "vcgt", "u", NEONvcgtu, 0>;
+def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
+ NEONvcgt, 0>;
+def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
+ NEONvcgt, 0>;
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
-def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge.f32", v2i32, v2f32,
- int_arm_neon_vacged, 0>;
-def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge.f32", v4i32, v4f32,
- int_arm_neon_vacgeq, 0>;
+def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32",
+ v2i32, v2f32, int_arm_neon_vacged, 0>;
+def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge", "f32",
+ v4i32, v4f32, int_arm_neon_vacgeq, 0>;
// VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
-def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt.f32", v2i32, v2f32,
- int_arm_neon_vacgtd, 0>;
-def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt.f32", v4i32, v4f32,
- int_arm_neon_vacgtq, 0>;
+def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt", "f32",
+ v2i32, v2f32, int_arm_neon_vacgtd, 0>;
+def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt", "f32",
+ v4i32, v4f32, int_arm_neon_vacgtq, 0>;
// VTST : Vector Test Bits
defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vtst.i", NEONvtst, 1>;
+ IIC_VBINi4Q, "vtst", "i", NEONvtst, 1>;
// Vector Bitwise Operations.
// VAND : Vector Bitwise AND
-def VANDd : N3VD<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", v2i32, v2i32, and, 1>;
-def VANDq : N3VQ<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", v4i32, v4i32, and, 1>;
+def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
+ v2i32, v2i32, and, 1>;
+def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
+ v4i32, v4i32, and, 1>;
// VEOR : Vector Bitwise Exclusive OR
-def VEORd : N3VD<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", v2i32, v2i32, xor, 1>;
-def VEORq : N3VQ<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", v4i32, v4i32, xor, 1>;
+def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
+ v2i32, v2i32, xor, 1>;
+def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
+ v4i32, v4i32, xor, 1>;
// VORR : Vector Bitwise OR
-def VORRd : N3VD<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", v2i32, v2i32, or, 1>;
-def VORRq : N3VQ<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", v4i32, v4i32, or, 1>;
+def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
+ v2i32, v2i32, or, 1>;
+def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
+ v4i32, v4i32, or, 1>;
// VBIC : Vector Bitwise Bit Clear (AND NOT)
-def VBICd : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
+def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
(ins DPR:$src1, DPR:$src2), IIC_VBINiD,
- "vbic\t$dst, $src1, $src2", "",
+ "vbic", "$dst, $src1, $src2", "",
[(set DPR:$dst, (v2i32 (and DPR:$src1,
(vnot_conv DPR:$src2))))]>;
-def VBICq : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
+def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
(ins QPR:$src1, QPR:$src2), IIC_VBINiQ,
- "vbic\t$dst, $src1, $src2", "",
+ "vbic", "$dst, $src1, $src2", "",
[(set QPR:$dst, (v4i32 (and QPR:$src1,
(vnot_conv QPR:$src2))))]>;
// VORN : Vector Bitwise OR NOT
-def VORNd : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst),
+def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst),
(ins DPR:$src1, DPR:$src2), IIC_VBINiD,
- "vorn\t$dst, $src1, $src2", "",
+ "vorn", "$dst, $src1, $src2", "",
[(set DPR:$dst, (v2i32 (or DPR:$src1,
(vnot_conv DPR:$src2))))]>;
-def VORNq : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
+def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
(ins QPR:$src1, QPR:$src2), IIC_VBINiQ,
- "vorn\t$dst, $src1, $src2", "",
+ "vorn", "$dst, $src1, $src2", "",
[(set QPR:$dst, (v4i32 (or QPR:$src1,
(vnot_conv QPR:$src2))))]>;
// VMVN : Vector Bitwise NOT
-def VMVNd : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
+def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
(outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD,
- "vmvn\t$dst, $src", "",
+ "vmvn", "$dst, $src", "",
[(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>;
-def VMVNq : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
+def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
(outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD,
- "vmvn\t$dst, $src", "",
+ "vmvn", "$dst, $src", "",
[(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>;
def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>;
def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>;
// VBSL : Vector Bitwise Select
-def VBSLd : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
+def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
(ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD,
- "vbsl\t$dst, $src2, $src3", "$src1 = $dst",
+ "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
[(set DPR:$dst,
(v2i32 (or (and DPR:$src2, DPR:$src1),
(and DPR:$src3, (vnot_conv DPR:$src1)))))]>;
-def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
+def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
(ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ,
- "vbsl\t$dst, $src2, $src3", "$src1 = $dst",
+ "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
[(set QPR:$dst,
(v4i32 (or (and QPR:$src2, QPR:$src1),
(and QPR:$src3, (vnot_conv QPR:$src1)))))]>;
// VBIF : Vector Bitwise Insert if False
-// like VBSL but with: "vbif\t$dst, $src3, $src1", "$src2 = $dst",
+// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
// VBIT : Vector Bitwise Insert if True
-// like VBSL but with: "vbit\t$dst, $src2, $src1", "$src3 = $dst",
+// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
// These are not yet implemented. The TwoAddress pass will not go looking
// for equivalent operations with different register constraints; it just
// inserts copies.
@@ -1974,259 +2199,270 @@ def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
// Vector Absolute Differences.
// VABD : Vector Absolute Difference
-defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vabd.s", int_arm_neon_vabds, 0>;
-defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vabd.u", int_arm_neon_vabdu, 0>;
-def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, "vabd.f32", v2f32, v2f32,
- int_arm_neon_vabds, 0>;
-def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vabd.f32", v4f32, v4f32,
- int_arm_neon_vabds, 0>;
+defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D,
+ IIC_VBINi4Q, IIC_VBINi4Q,
+ "vabd", "s", int_arm_neon_vabds, 0>;
+defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D,
+ IIC_VBINi4Q, IIC_VBINi4Q,
+ "vabd", "u", int_arm_neon_vabdu, 0>;
+def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND,
+ "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>;
+def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ,
+ "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>;
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
-defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, "vabdl.s", int_arm_neon_vabdls, 0>;
-defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, "vabdl.u", int_arm_neon_vabdlu, 0>;
+defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q,
+ "vabdl", "s", int_arm_neon_vabdls, 0>;
+defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q,
+ "vabdl", "u", int_arm_neon_vabdlu, 0>;
// VABA : Vector Absolute Difference and Accumulate
-defm VABAs : N3VInt3_QHS<0,0,0b0111,1, "vaba.s", int_arm_neon_vabas>;
-defm VABAu : N3VInt3_QHS<1,0,0b0111,1, "vaba.u", int_arm_neon_vabau>;
+defm VABAs : N3VInt3_QHS<0,0,0b0111,1, "vaba", "s", int_arm_neon_vabas>;
+defm VABAu : N3VInt3_QHS<1,0,0b0111,1, "vaba", "u", int_arm_neon_vabau>;
// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
-defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal.s", int_arm_neon_vabals>;
-defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>;
+defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal", "s", int_arm_neon_vabals>;
+defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>;
// Vector Maximum and Minimum.
// VMAX : Vector Maximum
defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vmax.s", int_arm_neon_vmaxs, 1>;
+ IIC_VBINi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>;
defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vmax.u", int_arm_neon_vmaxu, 1>;
-def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax.f32", v2f32, v2f32,
- int_arm_neon_vmaxs, 1>;
-def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax.f32", v4f32, v4f32,
- int_arm_neon_vmaxs, 1>;
+ IIC_VBINi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>;
+def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32",
+ v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32",
+ v4f32, v4f32, int_arm_neon_vmaxs, 1>;
// VMIN : Vector Minimum
defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vmin.s", int_arm_neon_vmins, 1>;
+ IIC_VBINi4Q, "vmin", "s", int_arm_neon_vmins, 1>;
defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vmin.u", int_arm_neon_vminu, 1>;
-def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin.f32", v2f32, v2f32,
- int_arm_neon_vmins, 1>;
-def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin.f32", v4f32, v4f32,
- int_arm_neon_vmins, 1>;
+ IIC_VBINi4Q, "vmin", "u", int_arm_neon_vminu, 1>;
+def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32",
+ v2f32, v2f32, int_arm_neon_vmins, 1>;
+def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin", "f32",
+ v4f32, v4f32, int_arm_neon_vmins, 1>;
// Vector Pairwise Operations.
// VPADD : Vector Pairwise Add
-def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd.i8", v8i8, v8i8,
- int_arm_neon_vpadd, 0>;
-def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd.i16", v4i16, v4i16,
- int_arm_neon_vpadd, 0>;
-def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd.i32", v2i32, v2i32,
- int_arm_neon_vpadd, 0>;
-def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd.f32", v2f32, v2f32,
- int_arm_neon_vpadd, 0>;
+def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd", "i8",
+ v8i8, v8i8, int_arm_neon_vpadd, 0>;
+def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd", "i16",
+ v4i16, v4i16, int_arm_neon_vpadd, 0>;
+def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd", "i32",
+ v2i32, v2i32, int_arm_neon_vpadd, 0>;
+def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd", "f32",
+ v2f32, v2f32, int_arm_neon_vpadd, 0>;
// VPADDL : Vector Pairwise Add Long
-defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s",
+defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
int_arm_neon_vpaddls>;
-defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl.u",
+defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
int_arm_neon_vpaddlu>;
// VPADAL : Vector Pairwise Add and Accumulate Long
-defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal.s",
+defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
int_arm_neon_vpadals>;
-defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal.u",
+defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
int_arm_neon_vpadalu>;
// VPMAX : Vector Pairwise Maximum
-def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.s8", v8i8, v8i8,
- int_arm_neon_vpmaxs, 0>;
-def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.s16", v4i16, v4i16,
- int_arm_neon_vpmaxs, 0>;
-def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.s32", v2i32, v2i32,
- int_arm_neon_vpmaxs, 0>;
-def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.u8", v8i8, v8i8,
- int_arm_neon_vpmaxu, 0>;
-def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.u16", v4i16, v4i16,
- int_arm_neon_vpmaxu, 0>;
-def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.u32", v2i32, v2i32,
- int_arm_neon_vpmaxu, 0>;
-def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax.f32", v2f32, v2f32,
- int_arm_neon_vpmaxs, 0>;
+def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "s8",
+ v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
+def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "s16",
+ v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
+def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "s32",
+ v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
+def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "u8",
+ v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
+def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "u16",
+ v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
+def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "u32",
+ v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
+def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax", "f32",
+ v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
// VPMIN : Vector Pairwise Minimum
-def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.s8", v8i8, v8i8,
- int_arm_neon_vpmins, 0>;
-def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.s16", v4i16, v4i16,
- int_arm_neon_vpmins, 0>;
-def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.s32", v2i32, v2i32,
- int_arm_neon_vpmins, 0>;
-def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.u8", v8i8, v8i8,
- int_arm_neon_vpminu, 0>;
-def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.u16", v4i16, v4i16,
- int_arm_neon_vpminu, 0>;
-def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.u32", v2i32, v2i32,
- int_arm_neon_vpminu, 0>;
-def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin.f32", v2f32, v2f32,
- int_arm_neon_vpmins, 0>;
+def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "s8",
+ v8i8, v8i8, int_arm_neon_vpmins, 0>;
+def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "s16",
+ v4i16, v4i16, int_arm_neon_vpmins, 0>;
+def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "s32",
+ v2i32, v2i32, int_arm_neon_vpmins, 0>;
+def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "u8",
+ v8i8, v8i8, int_arm_neon_vpminu, 0>;
+def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "u16",
+ v4i16, v4i16, int_arm_neon_vpminu, 0>;
+def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "u32",
+ v2i32, v2i32, int_arm_neon_vpminu, 0>;
+def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin", "f32",
+ v2f32, v2f32, int_arm_neon_vpmins, 0>;
// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
// VRECPE : Vector Reciprocal Estimate
def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
- IIC_VUNAD, "vrecpe.u32",
+ IIC_VUNAD, "vrecpe", "u32",
v2i32, v2i32, int_arm_neon_vrecpe>;
def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
- IIC_VUNAQ, "vrecpe.u32",
+ IIC_VUNAQ, "vrecpe", "u32",
v4i32, v4i32, int_arm_neon_vrecpe>;
def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
- IIC_VUNAD, "vrecpe.f32",
+ IIC_VUNAD, "vrecpe", "f32",
v2f32, v2f32, int_arm_neon_vrecpe>;
def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
- IIC_VUNAQ, "vrecpe.f32",
+ IIC_VUNAQ, "vrecpe", "f32",
v4f32, v4f32, int_arm_neon_vrecpe>;
// VRECPS : Vector Reciprocal Step
-def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSD, "vrecps.f32", v2f32, v2f32,
- int_arm_neon_vrecps, 1>;
-def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSQ, "vrecps.f32", v4f32, v4f32,
- int_arm_neon_vrecps, 1>;
+def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1,
+ IIC_VRECSD, "vrecps", "f32",
+ v2f32, v2f32, int_arm_neon_vrecps, 1>;
+def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1,
+ IIC_VRECSQ, "vrecps", "f32",
+ v4f32, v4f32, int_arm_neon_vrecps, 1>;
// VRSQRTE : Vector Reciprocal Square Root Estimate
def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
- IIC_VUNAD, "vrsqrte.u32",
+ IIC_VUNAD, "vrsqrte", "u32",
v2i32, v2i32, int_arm_neon_vrsqrte>;
def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
- IIC_VUNAQ, "vrsqrte.u32",
+ IIC_VUNAQ, "vrsqrte", "u32",
v4i32, v4i32, int_arm_neon_vrsqrte>;
def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
- IIC_VUNAD, "vrsqrte.f32",
+ IIC_VUNAD, "vrsqrte", "f32",
v2f32, v2f32, int_arm_neon_vrsqrte>;
def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
- IIC_VUNAQ, "vrsqrte.f32",
+ IIC_VUNAQ, "vrsqrte", "f32",
v4f32, v4f32, int_arm_neon_vrsqrte>;
// VRSQRTS : Vector Reciprocal Square Root Step
-def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSD, "vrsqrts.f32", v2f32, v2f32,
- int_arm_neon_vrsqrts, 1>;
-def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSQ, "vrsqrts.f32", v4f32, v4f32,
- int_arm_neon_vrsqrts, 1>;
+def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1,
+ IIC_VRECSD, "vrsqrts", "f32",
+ v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
+def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1,
+ IIC_VRECSQ, "vrsqrts", "f32",
+ v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
// Vector Shifts.
// VSHL : Vector Shift
defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ,
- IIC_VSHLiQ, "vshl.s", int_arm_neon_vshifts, 0>;
+ IIC_VSHLiQ, "vshl", "s", int_arm_neon_vshifts, 0>;
defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ,
- IIC_VSHLiQ, "vshl.u", int_arm_neon_vshiftu, 0>;
+ IIC_VSHLiQ, "vshl", "u", int_arm_neon_vshiftu, 0>;
// VSHL : Vector Shift Left (Immediate)
-defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl.i", NEONvshl>;
+defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
// VSHR : Vector Shift Right (Immediate)
-defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>;
-defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>;
+defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs>;
+defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru>;
// VSHLL : Vector Shift Left Long
-defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll.s", NEONvshlls>;
-defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll.u", NEONvshllu>;
+defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
+defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
// VSHLL : Vector Shift Left Long (with maximum shift count)
class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
- bit op6, bit op4, string OpcodeStr, ValueType ResTy,
+ bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
ValueType OpTy, SDNode OpNode>
- : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, ResTy, OpTy, OpNode> {
+ : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
+ ResTy, OpTy, OpNode> {
let Inst{21-16} = op21_16;
}
-def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8",
+def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
v8i16, v8i8, NEONvshlli>;
-def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16",
+def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
v4i32, v4i16, NEONvshlli>;
-def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32",
+def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
v2i64, v2i32, NEONvshlli>;
// VSHRN : Vector Shift Right and Narrow
-defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn.i", NEONvshrn>;
+defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", NEONvshrn>;
// VRSHL : Vector Rounding Shift
defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vrshl.s", int_arm_neon_vrshifts, 0>;
+ IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts, 0>;
defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vrshl.u", int_arm_neon_vrshiftu, 0>;
+ IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu, 0>;
// VRSHR : Vector Rounding Shift Right
-defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.s", NEONvrshrs>;
-defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.u", NEONvrshru>;
+defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>;
+defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>;
// VRSHRN : Vector Rounding Shift Right and Narrow
-defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn.i",
+defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
NEONvrshrn>;
// VQSHL : Vector Saturating Shift
defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqshl.s", int_arm_neon_vqshifts, 0>;
+ IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts, 0>;
defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqshl.u", int_arm_neon_vqshiftu, 0>;
+ IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu, 0>;
// VQSHL : Vector Saturating Shift Left (Immediate)
-defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.s", NEONvqshls>;
-defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.u", NEONvqshlu>;
+defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>;
+defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>;
// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
-defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu.s", NEONvqshlsu>;
+defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu", "s", NEONvqshlsu>;
// VQSHRN : Vector Saturating Shift Right and Narrow
-defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.s",
+defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
NEONvqshrns>;
-defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.u",
+defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
NEONvqshrnu>;
// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
-defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun.s",
+defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
NEONvqshrnsu>;
// VQRSHL : Vector Saturating Rounding Shift
defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqrshl.s", int_arm_neon_vqrshifts, 0>;
+ IIC_VSHLi4Q, "vqrshl", "s",
+ int_arm_neon_vqrshifts, 0>;
defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqrshl.u", int_arm_neon_vqrshiftu, 0>;
+ IIC_VSHLi4Q, "vqrshl", "u",
+ int_arm_neon_vqrshiftu, 0>;
// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
-defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.s",
+defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
NEONvqrshrns>;
-defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.u",
+defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
NEONvqrshrnu>;
// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
-defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun.s",
+defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
NEONvqrshrnsu>;
// VSRA : Vector Shift Right and Accumulate
-defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>;
-defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra.u", NEONvshru>;
+defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
+defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
// VRSRA : Vector Rounding Shift Right and Accumulate
-defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra.s", NEONvrshrs>;
-defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra.u", NEONvrshru>;
+defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
+defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
// VSLI : Vector Shift Left and Insert
-defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli.", NEONvsli>;
+defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli>;
// VSRI : Vector Shift Right and Insert
-defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>;
+defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri>;
// Vector Absolute and Saturating Absolute.
// VABS : Vector Absolute Value
defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
- IIC_VUNAiD, IIC_VUNAiQ, "vabs.s",
+ IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
int_arm_neon_vabs>;
def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
- IIC_VUNAD, "vabs.f32",
+ IIC_VUNAD, "vabs", "f32",
v2f32, v2f32, int_arm_neon_vabs>;
def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
- IIC_VUNAQ, "vabs.f32",
+ IIC_VUNAQ, "vabs", "f32",
v4f32, v4f32, int_arm_neon_vabs>;
// VQABS : Vector Saturating Absolute Value
defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
- IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs.s",
+ IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
int_arm_neon_vqabs>;
// Vector Negate.
@@ -2234,31 +2470,31 @@ defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>;
-class VNEGD<bits<2> size, string OpcodeStr, ValueType Ty>
+class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src),
- IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (Ty (vneg DPR:$src)))]>;
-class VNEGQ<bits<2> size, string OpcodeStr, ValueType Ty>
+class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src),
- IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (Ty (vneg QPR:$src)))]>;
// VNEG : Vector Negate
-def VNEGs8d : VNEGD<0b00, "vneg.s8", v8i8>;
-def VNEGs16d : VNEGD<0b01, "vneg.s16", v4i16>;
-def VNEGs32d : VNEGD<0b10, "vneg.s32", v2i32>;
-def VNEGs8q : VNEGQ<0b00, "vneg.s8", v16i8>;
-def VNEGs16q : VNEGQ<0b01, "vneg.s16", v8i16>;
-def VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>;
+def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
+def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
+def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
+def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
+def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
+def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
// VNEG : Vector Negate (floating-point)
def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
(outs DPR:$dst), (ins DPR:$src), IIC_VUNAD,
- "vneg.f32\t$dst, $src", "",
+ "vneg", "f32", "$dst, $src", "",
[(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>;
def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
(outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ,
- "vneg.f32\t$dst, $src", "",
+ "vneg", "f32", "$dst, $src", "",
[(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;
def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>;
@@ -2270,35 +2506,35 @@ def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>;
// VQNEG : Vector Saturating Negate
defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
- IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg.s",
+ IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
int_arm_neon_vqneg>;
// Vector Bit Counting Operations.
// VCLS : Vector Count Leading Sign Bits
defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
- IIC_VCNTiD, IIC_VCNTiQ, "vcls.s",
+ IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
int_arm_neon_vcls>;
// VCLZ : Vector Count Leading Zeros
defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
- IIC_VCNTiD, IIC_VCNTiQ, "vclz.i",
+ IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
int_arm_neon_vclz>;
// VCNT : Vector Count One Bits
def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
- IIC_VCNTiD, "vcnt.8",
+ IIC_VCNTiD, "vcnt", "8",
v8i8, v8i8, int_arm_neon_vcnt>;
def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
- IIC_VCNTiQ, "vcnt.8",
+ IIC_VCNTiQ, "vcnt", "8",
v16i8, v16i8, int_arm_neon_vcnt>;
// Vector Move Operations.
// VMOV : Vector Move (Register)
-def VMOVDneon: N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
- IIC_VMOVD, "vmov\t$dst, $src", "", []>;
-def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
- IIC_VMOVD, "vmov\t$dst, $src", "", []>;
+def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
+ IIC_VMOVD, "vmov", "$dst, $src", "", []>;
+def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
+ IIC_VMOVD, "vmov", "$dst, $src", "", []>;
// VMOV : Vector Move (Immediate)
@@ -2339,65 +2575,65 @@ def vmovImm64 : PatLeaf<(build_vector), [{
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
(ins h8imm:$SIMM), IIC_VMOVImm,
- "vmov.i8\t$dst, $SIMM", "",
+ "vmov", "i8", "$dst, $SIMM", "",
[(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>;
def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
(ins h8imm:$SIMM), IIC_VMOVImm,
- "vmov.i8\t$dst, $SIMM", "",
+ "vmov", "i8", "$dst, $SIMM", "",
[(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>;
-def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst),
+def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 0, {?}, 1, (outs DPR:$dst),
(ins h16imm:$SIMM), IIC_VMOVImm,
- "vmov.i16\t$dst, $SIMM", "",
+ "vmov", "i16", "$dst, $SIMM", "",
[(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>;
-def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst),
+def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 1, {?}, 1, (outs QPR:$dst),
(ins h16imm:$SIMM), IIC_VMOVImm,
- "vmov.i16\t$dst, $SIMM", "",
+ "vmov", "i16", "$dst, $SIMM", "",
[(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>;
-def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst),
+def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, {?}, 1, (outs DPR:$dst),
(ins h32imm:$SIMM), IIC_VMOVImm,
- "vmov.i32\t$dst, $SIMM", "",
+ "vmov", "i32", "$dst, $SIMM", "",
[(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>;
-def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst),
+def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, {?}, 1, (outs QPR:$dst),
(ins h32imm:$SIMM), IIC_VMOVImm,
- "vmov.i32\t$dst, $SIMM", "",
+ "vmov", "i32", "$dst, $SIMM", "",
[(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>;
def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
(ins h64imm:$SIMM), IIC_VMOVImm,
- "vmov.i64\t$dst, $SIMM", "",
+ "vmov", "i64", "$dst, $SIMM", "",
[(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>;
def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
(ins h64imm:$SIMM), IIC_VMOVImm,
- "vmov.i64\t$dst, $SIMM", "",
+ "vmov", "i64", "$dst, $SIMM", "",
[(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
// VMOV : Vector Get Lane (move scalar to ARM core register)
-def VGETLNs8 : NVGetLane<0b11100101, 0b1011, 0b00,
+def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
(outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", ".s8\t$dst, $src[$lane]",
+ IIC_VMOVSI, "vmov", "s8", "$dst, $src[$lane]",
[(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src),
imm:$lane))]>;
-def VGETLNs16 : NVGetLane<0b11100001, 0b1011, 0b01,
+def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
(outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", ".s16\t$dst, $src[$lane]",
+ IIC_VMOVSI, "vmov", "s16", "$dst, $src[$lane]",
[(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src),
imm:$lane))]>;
-def VGETLNu8 : NVGetLane<0b11101101, 0b1011, 0b00,
+def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
(outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", ".u8\t$dst, $src[$lane]",
+ IIC_VMOVSI, "vmov", "u8", "$dst, $src[$lane]",
[(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src),
imm:$lane))]>;
-def VGETLNu16 : NVGetLane<0b11101001, 0b1011, 0b01,
+def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
(outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", ".u16\t$dst, $src[$lane]",
+ IIC_VMOVSI, "vmov", "u16", "$dst, $src[$lane]",
[(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src),
imm:$lane))]>;
-def VGETLNi32 : NVGetLane<0b11100001, 0b1011, 0b00,
+def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
(outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", ".32\t$dst, $src[$lane]",
+ IIC_VMOVSI, "vmov", "32", "$dst, $src[$lane]",
[(set GPR:$dst, (extractelt (v2i32 DPR:$src),
imm:$lane))]>;
// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
@@ -2436,19 +2672,19 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
// VMOV : Vector Set Lane (move ARM core register to scalar)
let Constraints = "$src1 = $dst" in {
-def VSETLNi8 : NVSetLane<0b11100100, 0b1011, 0b00, (outs DPR:$dst),
+def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$dst),
(ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
- IIC_VMOVISL, "vmov", ".8\t$dst[$lane], $src2",
+ IIC_VMOVISL, "vmov", "8", "$dst[$lane], $src2",
[(set DPR:$dst, (vector_insert (v8i8 DPR:$src1),
GPR:$src2, imm:$lane))]>;
-def VSETLNi16 : NVSetLane<0b11100000, 0b1011, 0b01, (outs DPR:$dst),
+def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$dst),
(ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
- IIC_VMOVISL, "vmov", ".16\t$dst[$lane], $src2",
+ IIC_VMOVISL, "vmov", "16", "$dst[$lane], $src2",
[(set DPR:$dst, (vector_insert (v4i16 DPR:$src1),
GPR:$src2, imm:$lane))]>;
-def VSETLNi32 : NVSetLane<0b11100000, 0b1011, 0b00, (outs DPR:$dst),
+def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$dst),
(ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
- IIC_VMOVISL, "vmov", ".32\t$dst[$lane], $src2",
+ IIC_VMOVISL, "vmov", "32", "$dst[$lane], $src2",
[(set DPR:$dst, (insertelt (v2i32 DPR:$src1),
GPR:$src2, imm:$lane))]>;
}
@@ -2512,55 +2748,57 @@ def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
// VDUP : Vector Duplicate (from ARM core register to all elements)
-class VDUPD<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty>
+class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
: NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src),
- IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"),
+ IIC_VMOVIS, "vdup", Dt, "$dst, $src",
[(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
-class VDUPQ<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty>
+class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
: NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src),
- IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"),
+ IIC_VMOVIS, "vdup", Dt, "$dst, $src",
[(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
-def VDUP8d : VDUPD<0b11101100, 0b00, ".8", v8i8>;
-def VDUP16d : VDUPD<0b11101000, 0b01, ".16", v4i16>;
-def VDUP32d : VDUPD<0b11101000, 0b00, ".32", v2i32>;
-def VDUP8q : VDUPQ<0b11101110, 0b00, ".8", v16i8>;
-def VDUP16q : VDUPQ<0b11101010, 0b01, ".16", v8i16>;
-def VDUP32q : VDUPQ<0b11101010, 0b00, ".32", v4i32>;
+def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
+def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
+def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>;
+def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
+def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
+def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src),
- IIC_VMOVIS, "vdup", ".32\t$dst, $src",
+ IIC_VMOVIS, "vdup", "32", "$dst, $src",
[(set DPR:$dst, (v2f32 (NEONvdup
(f32 (bitconvert GPR:$src)))))]>;
def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src),
- IIC_VMOVIS, "vdup", ".32\t$dst, $src",
+ IIC_VMOVIS, "vdup", "32", "$dst, $src",
[(set QPR:$dst, (v4f32 (NEONvdup
(f32 (bitconvert GPR:$src)))))]>;
// VDUP : Vector Duplicate Lane (from scalar to all elements)
-class VDUPLND<string OpcodeStr, ValueType Ty>
- : N2VDup<0b11, 0b11, 0b11000, 0, 0,
+class VDUPLND<bits<2> op19_18, bits<2> op17_16,
+ string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0,
(outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD,
- !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "",
+ OpcodeStr, Dt, "$dst, $src[$lane]", "",
[(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>;
-class VDUPLNQ<string OpcodeStr, ValueType ResTy, ValueType OpTy>
- : N2VDup<0b11, 0b11, 0b11000, 1, 0,
+class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy>
+ : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0,
(outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD,
- !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "",
+ OpcodeStr, Dt, "$dst, $src[$lane]", "",
[(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>;
// Inst{19-16} is partially specified depending on the element size.
-def VDUPLN8d : VDUPLND<"vdup.8", v8i8> { let Inst{16} = 1; }
-def VDUPLN16d : VDUPLND<"vdup.16", v4i16> { let Inst{17-16} = 0b10; }
-def VDUPLN32d : VDUPLND<"vdup.32", v2i32> { let Inst{18-16} = 0b100; }
-def VDUPLNfd : VDUPLND<"vdup.32", v2f32> { let Inst{18-16} = 0b100; }
-def VDUPLN8q : VDUPLNQ<"vdup.8", v16i8, v8i8> { let Inst{16} = 1; }
-def VDUPLN16q : VDUPLNQ<"vdup.16", v8i16, v4i16> { let Inst{17-16} = 0b10; }
-def VDUPLN32q : VDUPLNQ<"vdup.32", v4i32, v2i32> { let Inst{18-16} = 0b100; }
-def VDUPLNfq : VDUPLNQ<"vdup.32", v4f32, v2f32> { let Inst{18-16} = 0b100; }
+def VDUPLN8d : VDUPLND<{?,?}, {?,1}, "vdup", "8", v8i8>;
+def VDUPLN16d : VDUPLND<{?,?}, {1,0}, "vdup", "16", v4i16>;
+def VDUPLN32d : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2i32>;
+def VDUPLNfd : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2f32>;
+def VDUPLN8q : VDUPLNQ<{?,?}, {?,1}, "vdup", "8", v16i8, v8i8>;
+def VDUPLN16q : VDUPLNQ<{?,?}, {1,0}, "vdup", "16", v8i16, v4i16>;
+def VDUPLN32q : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4i32, v2i32>;
+def VDUPLNfq : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4f32, v2f32>;
def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
(v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
@@ -2579,19 +2817,15 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
-def VDUPfdf : N2VDup<0b11, 0b11, 0b11000, 0, 0,
- (outs DPR:$dst), (ins SPR:$src),
- IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "",
- [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]> {
- let Inst{18-16} = 0b100;
-}
+def VDUPfdf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0,
+ (outs DPR:$dst), (ins SPR:$src),
+ IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
+ [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
-def VDUPfqf : N2VDup<0b11, 0b11, 0b11000, 1, 0,
- (outs QPR:$dst), (ins SPR:$src),
- IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "",
- [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]> {
- let Inst{18-16} = 0b100;
-}
+def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
+ (outs QPR:$dst), (ins SPR:$src),
+ IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
+ [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)),
(INSERT_SUBREG QPR:$src,
@@ -2603,176 +2837,178 @@ def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)),
(DSubReg_f64_other_reg imm:$lane))>;
// VMOVN : Vector Narrowing Move
-defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, "vmovn.i",
- int_arm_neon_vmovn>;
+defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
+ "vmovn", "i", int_arm_neon_vmovn>;
// VQMOVN : Vector Saturating Narrowing Move
-defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, "vqmovn.s",
- int_arm_neon_vqmovns>;
-defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, "vqmovn.u",
- int_arm_neon_vqmovnu>;
-defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun.s",
- int_arm_neon_vqmovnsu>;
+defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
+ "vqmovn", "s", int_arm_neon_vqmovns>;
+defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
+ "vqmovn", "u", int_arm_neon_vqmovnu>;
+defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
+ "vqmovun", "s", int_arm_neon_vqmovnsu>;
// VMOVL : Vector Lengthening Move
-defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl.s", int_arm_neon_vmovls>;
-defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl.u", int_arm_neon_vmovlu>;
+defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s",
+ int_arm_neon_vmovls>;
+defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u",
+ int_arm_neon_vmovlu>;
// Vector Conversions.
// VCVT : Vector Convert Between Floating-Point and Integers
-def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32",
+def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
v2i32, v2f32, fp_to_sint>;
-def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32",
+def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
v2i32, v2f32, fp_to_uint>;
-def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32",
+def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
v2f32, v2i32, sint_to_fp>;
-def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32",
+def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
v2f32, v2i32, uint_to_fp>;
-def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32",
+def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
v4i32, v4f32, fp_to_sint>;
-def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32",
+def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
v4i32, v4f32, fp_to_uint>;
-def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32",
+def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
v4f32, v4i32, sint_to_fp>;
-def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32",
+def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
v4f32, v4i32, uint_to_fp>;
// VCVT : Vector Convert Between Floating-Point and Fixed-Point.
-def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt.s32.f32",
+def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
-def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt.u32.f32",
+def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
-def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt.f32.s32",
+def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
-def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt.f32.u32",
+def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
-def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt.s32.f32",
+def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
-def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt.u32.f32",
+def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
-def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt.f32.s32",
+def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
-def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt.f32.u32",
+def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
// Vector Reverse.
// VREV64 : Vector Reverse elements within 64-bit doublewords
-class VREV64D<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst),
(ins DPR:$src), IIC_VMOVD,
- !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>;
-class VREV64Q<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst),
(ins QPR:$src), IIC_VMOVD,
- !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>;
-def VREV64d8 : VREV64D<0b00, "vrev64.8", v8i8>;
-def VREV64d16 : VREV64D<0b01, "vrev64.16", v4i16>;
-def VREV64d32 : VREV64D<0b10, "vrev64.32", v2i32>;
-def VREV64df : VREV64D<0b10, "vrev64.32", v2f32>;
+def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
+def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
+def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
+def VREV64df : VREV64D<0b10, "vrev64", "32", v2f32>;
-def VREV64q8 : VREV64Q<0b00, "vrev64.8", v16i8>;
-def VREV64q16 : VREV64Q<0b01, "vrev64.16", v8i16>;
-def VREV64q32 : VREV64Q<0b10, "vrev64.32", v4i32>;
-def VREV64qf : VREV64Q<0b10, "vrev64.32", v4f32>;
+def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
+def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
+def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
+def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>;
// VREV32 : Vector Reverse elements within 32-bit words
-class VREV32D<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst),
(ins DPR:$src), IIC_VMOVD,
- !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>;
-class VREV32Q<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst),
(ins QPR:$src), IIC_VMOVD,
- !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>;
-def VREV32d8 : VREV32D<0b00, "vrev32.8", v8i8>;
-def VREV32d16 : VREV32D<0b01, "vrev32.16", v4i16>;
+def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
+def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
-def VREV32q8 : VREV32Q<0b00, "vrev32.8", v16i8>;
-def VREV32q16 : VREV32Q<0b01, "vrev32.16", v8i16>;
+def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
+def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
// VREV16 : Vector Reverse elements within 16-bit halfwords
-class VREV16D<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst),
(ins DPR:$src), IIC_VMOVD,
- !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>;
-class VREV16Q<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst),
(ins QPR:$src), IIC_VMOVD,
- !strconcat(OpcodeStr, "\t$dst, $src"), "",
+ OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>;
-def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>;
-def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>;
+def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
+def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
// Other Vector Shuffles.
// VEXT : Vector Extract
-class VEXTd<string OpcodeStr, ValueType Ty>
- : N3VImm<0,1,0b11,0,0, (outs DPR:$dst),
- (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD,
- !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "",
- [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs),
- (Ty DPR:$rhs), imm:$index)))]>;
-
-class VEXTq<string OpcodeStr, ValueType Ty>
- : N3VImm<0,1,0b11,1,0, (outs QPR:$dst),
- (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ,
- !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "",
- [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs),
- (Ty QPR:$rhs), imm:$index)))]>;
-
-def VEXTd8 : VEXTd<"vext.8", v8i8>;
-def VEXTd16 : VEXTd<"vext.16", v4i16>;
-def VEXTd32 : VEXTd<"vext.32", v2i32>;
-def VEXTdf : VEXTd<"vext.32", v2f32>;
-
-def VEXTq8 : VEXTq<"vext.8", v16i8>;
-def VEXTq16 : VEXTq<"vext.16", v8i16>;
-def VEXTq32 : VEXTq<"vext.32", v4i32>;
-def VEXTqf : VEXTq<"vext.32", v4f32>;
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
+ : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst),
+ (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD,
+ OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
+ [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs),
+ (Ty DPR:$rhs), imm:$index)))]>;
+
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
+ : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst),
+ (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ,
+ OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
+ [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs),
+ (Ty QPR:$rhs), imm:$index)))]>;
+
+def VEXTd8 : VEXTd<"vext", "8", v8i8>;
+def VEXTd16 : VEXTd<"vext", "16", v4i16>;
+def VEXTd32 : VEXTd<"vext", "32", v2i32>;
+def VEXTdf : VEXTd<"vext", "32", v2f32>;
+
+def VEXTq8 : VEXTq<"vext", "8", v16i8>;
+def VEXTq16 : VEXTq<"vext", "16", v8i16>;
+def VEXTq32 : VEXTq<"vext", "32", v4i32>;
+def VEXTqf : VEXTq<"vext", "32", v4f32>;
// VTRN : Vector Transpose
-def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn.8">;
-def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn.16">;
-def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn.32">;
+def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
+def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
+def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
-def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn.8">;
-def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn.16">;
-def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn.32">;
+def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
+def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
+def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
// VUZP : Vector Unzip (Deinterleave)
-def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp.8">;
-def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp.16">;
-def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp.32">;
+def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
+def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
+def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">;
-def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp.8">;
-def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp.16">;
-def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp.32">;
+def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
+def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
+def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
// VZIP : Vector Zip (Interleave)
-def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip.8">;
-def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip.16">;
-def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip.32">;
+def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
+def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
+def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">;
-def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip.8">;
-def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip.16">;
-def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">;
+def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
+def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
+def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
// Vector Table Lookup and Table Extension.
@@ -2780,25 +3016,25 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">;
def VTBL1
: N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$src), IIC_VTB1,
- "vtbl.8\t$dst, \\{$tbl1\\}, $src", "",
+ "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>;
let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2,
- "vtbl.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "",
+ "vtbl", "8", "$dst, \\{$tbl1,$tbl2\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2
DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
def VTBL3
: N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3,
- "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "",
+ "vtbl", "8", "$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3
DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
def VTBL4
: N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4,
- "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "",
+ "vtbl", "8", "$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2,
DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
} // hasExtraSrcRegAllocReq = 1
@@ -2807,26 +3043,26 @@ def VTBL4
def VTBX1
: N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst),
(ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1,
- "vtbx.8\t$dst, \\{$tbl1\\}, $src", "$orig = $dst",
+ "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1
DPR:$orig, DPR:$tbl1, DPR:$src)))]>;
let hasExtraSrcRegAllocReq = 1 in {
def VTBX2
: N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst),
(ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2,
- "vtbx.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst",
+ "vtbx", "8", "$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2
DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
def VTBX3
: N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst),
(ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3,
- "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst",
+ "vtbx", "8", "$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1,
DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
def VTBX4
: N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1,
DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4,
- "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst",
+ "vtbx", "8", "$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1,
DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
} // hasExtraSrcRegAllocReq = 1
@@ -2840,17 +3076,17 @@ def VTBX4
// Vector Add Operations used for single-precision FP
let neverHasSideEffects = 1 in
-def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd,1>;
+def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd", "f32", v2f32, v2f32, fadd,1>;
def : N3VDsPat<fadd, VADDfd_sfp>;
// Vector Sub Operations used for single-precision FP
let neverHasSideEffects = 1 in
-def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub,0>;
+def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub", "f32", v2f32, v2f32, fsub,0>;
def : N3VDsPat<fsub, VSUBfd_sfp>;
// Vector Multiply Operations used for single-precision FP
let neverHasSideEffects = 1 in
-def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>;
+def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul", "f32", v2f32, v2f32, fmul,1>;
def : N3VDsPat<fmul, VMULfd_sfp>;
// Vector Multiply-Accumulate/Subtract used for single-precision FP
@@ -2858,17 +3094,17 @@ def : N3VDsPat<fmul, VMULfd_sfp>;
// we want to avoid them for now. e.g., alternating vmla/vadd instructions.
//let neverHasSideEffects = 1 in
-//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>;
+//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32,fmul,fadd>;
//def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>;
//let neverHasSideEffects = 1 in
-//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>;
+//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32,fmul,fsub>;
//def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
// Vector Absolute used for single-precision FP
let neverHasSideEffects = 1 in
def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
- IIC_VUNAD, "vabs.f32",
+ IIC_VUNAD, "vabs", "f32",
v2f32, v2f32, int_arm_neon_vabs>;
def : N2VDIntsPat<fabs, VABSfd_sfp>;
@@ -2876,27 +3112,27 @@ def : N2VDIntsPat<fabs, VABSfd_sfp>;
let neverHasSideEffects = 1 in
def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
(outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
- "vneg.f32\t$dst, $src", "", []>;
+ "vneg", "f32", "$dst, $src", "", []>;
def : N2VDIntsPat<fneg, VNEGf32d_sfp>;
// Vector Convert between single-precision FP and integer
let neverHasSideEffects = 1 in
-def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32",
+def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
v2i32, v2f32, fp_to_sint>;
def : N2VDsPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
let neverHasSideEffects = 1 in
-def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32",
+def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
v2i32, v2f32, fp_to_uint>;
def : N2VDsPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
let neverHasSideEffects = 1 in
-def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32",
+def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
v2f32, v2i32, sint_to_fp>;
def : N2VDsPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
let neverHasSideEffects = 1 in
-def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32",
+def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
v2f32, v2i32, uint_to_fp>;
def : N2VDsPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index d1831d1..b5956a3 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -296,7 +296,7 @@ let isBranch = 1, isTerminator = 1 in {
// Load Store Instructions.
//
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
"ldr", "\t$dst, $addr",
[(set tGPR:$dst, (load t_addrmode_s4:$addr))]>;
@@ -332,13 +332,14 @@ def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
// Load tconstpool
// FIXME: Use ldr.n to work around a Darwin assembler bug.
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
"ldr", ".n\t$dst, $addr",
[(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>;
// Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
+ mayHaveSideEffects = 1 in
def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
"ldr", "\t$dst, $addr", []>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 1bb9bfd..9489815 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -49,8 +49,8 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
// immediate splatted into multiple bytes of the word. t2_so_imm values are
// represented in the imm field in the same 12-bit form that they are encoded
-// into t2_so_imm instructions: the 8-bit immediate is the least significant bits
-// [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
+// into t2_so_imm instructions: the 8-bit immediate is the least significant
+// bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
def t2_so_imm : Operand<i32>,
PatLeaf<(imm), [{
return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1;
@@ -88,6 +88,21 @@ def t2_so_imm2part_2 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(V, MVT::i32);
}]>;
+def t2_so_neg_imm2part : Operand<i32>, PatLeaf<(imm), [{
+ return ARM_AM::isT2SOImmTwoPartVal(-(int)N->getZExtValue());
+ }]> {
+}
+
+def t2_so_neg_imm2part_1 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getT2SOImmTwoPartFirst(-(int)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def t2_so_neg_imm2part_2 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getT2SOImmTwoPartSecond(-(int)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
/// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31].
def imm1_31 : PatLeaf<(i32 imm), [{
return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32;
@@ -252,9 +267,9 @@ multiclass T2I_bin_ii12rs<string opc, PatFrag opnode, bit Commutable = 0> {
[(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
}
-/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
-/// binary operation that produces a value and use and define the carry bit.
-/// It's not predicable.
+/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
+/// for a binary operation that produces a value and use and define the carry
+/// bit. It's not predicable.
let Uses = [CPSR] in {
multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> {
// shifted imm
@@ -471,7 +486,7 @@ def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
//
// Load
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
defm t2LDR : T2I_ld<"ldr", UnOpFrag<(load node:$Src)>>;
// Loads with zero extension
@@ -615,7 +630,7 @@ def t2STR_POST : T2Iidxldst<(outs GPR:$base_wb),
AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
"str", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb,
- (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+ (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
def t2STRH_PRE : T2Iidxldst<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
@@ -718,9 +733,9 @@ def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
(t2UXTB16r_rot GPR:$Src, 8)>;
defm t2UXTAB : T2I_bin_rrot<"uxtab",
- BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
defm t2UXTAH : T2I_bin_rrot<"uxtah",
- BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
}
//===----------------------------------------------------------------------===//
@@ -1162,15 +1177,9 @@ def : T2Pat<(xor GPR:$LHS, t2_so_imm2part:$RHS),
def : T2Pat<(add GPR:$LHS, t2_so_imm2part:$RHS),
(t2ADDri (t2ADDri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
(t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(sub GPR:$LHS, t2_so_imm2part:$RHS),
- (t2SUBri (t2SUBri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
- (t2_so_imm2part_2 imm:$RHS))>;
-
-// ConstantPool, GlobalAddress, and JumpTable
-def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>;
-def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>;
-def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
- (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+def : T2Pat<(add GPR:$LHS, t2_so_neg_imm2part:$RHS),
+ (t2SUBri (t2SUBri GPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)),
+ (t2_so_neg_imm2part_2 imm:$RHS))>;
// 32-bit immediate using movw + movt.
// This is a single pseudo instruction to make it re-materializable. Remove
@@ -1180,10 +1189,20 @@ def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
"movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
[(set GPR:$dst, (i32 imm:$src))]>;
+// ConstantPool, GlobalAddress, and JumpTable
+def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
+ Requires<[IsThumb2, DontUseMovt]>;
+def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>;
+def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
+ Requires<[IsThumb2, UseMovt]>;
+
+def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+
// Pseudo instruction that combines ldr from constpool and add pc. This should
// be expanded into two instructions late to allow if-conversion and
// scheduling.
-let isReMaterializable = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index ba341f4..5bfe89d 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -54,7 +54,7 @@ def vfp_f64imm : Operand<f64>,
// Load / store Instructions.
//
-let canFoldAsLoad = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
IIC_fpLoad64, "vldr", ".64\t$dst, $addr",
[(set DPR:$dst, (load addrmode5:$addr))]>;
@@ -437,7 +437,7 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
let isReMaterializable = 1 in {
def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
VFPMiscFrm, IIC_VMOVImm,
- "fconstd", "\t$dst, $imm",
+ "vmov", ".f64\t$dst, $imm",
[(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
let Inst{27-23} = 0b11101;
let Inst{21-20} = 0b11;
@@ -448,7 +448,7 @@ def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm),
VFPMiscFrm, IIC_VMOVImm,
- "fconsts", "\t$dst, $imm",
+ "vmov", ".f32\t$dst, $imm",
[(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
let Inst{27-23} = 0b11101;
let Inst{21-20} = 0b11;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 24990e6..aa50cfd 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -139,7 +139,8 @@ ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
JITCodeEmitter &JCE) {
- JCE.startGVStub(GV, 4, 4);
+ MachineCodeEmitter::BufferState BS;
+ JCE.startGVStub(BS, GV, 4, 4);
intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
if (!sys::Memory::setRangeWritable((void*)Addr, 4)) {
llvm_unreachable("ERROR: Unable to mark indirect symbol writable");
@@ -148,19 +149,27 @@ void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
if (!sys::Memory::setRangeExecutable((void*)Addr, 4)) {
llvm_unreachable("ERROR: Unable to mark indirect symbol executable");
}
- void *PtrAddr = JCE.finishGVStub(GV);
+ void *PtrAddr = JCE.finishGVStub(BS);
addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
return PtrAddr;
}
+TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() {
+ // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a
+ // 4-byte address. See emitFunctionStub for details.
+ StubLayout Result = {16, 4};
+ return Result;
+}
+
void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
JITCodeEmitter &JCE) {
+ void *Addr;
// If this is just a call to an external function, emit a branch instead of a
// call. The code is the same except for one bit of the last instruction.
if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
// Branch to the corresponding function addr.
if (IsPIC) {
- // The stub is 8-byte size and 4-aligned.
+ // The stub is 16-byte size and 4-aligned.
intptr_t LazyPtr = getIndirectSymAddr(Fn);
if (!LazyPtr) {
// In PIC mode, the function stub is loading a lazy-ptr.
@@ -172,30 +181,30 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
errs() << "JIT: Stub emitted at [" << LazyPtr
<< "] for external function at '" << Fn << "'\n");
}
- JCE.startGVStub(F, 16, 4);
- intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
- if (!sys::Memory::setRangeWritable((void*)Addr, 16)) {
+ JCE.emitAlignment(4);
+ Addr = (void*)JCE.getCurrentPCValue();
+ if (!sys::Memory::setRangeWritable(Addr, 16)) {
llvm_unreachable("ERROR: Unable to mark stub writable");
}
- JCE.emitWordLE(0xe59fc004); // ldr pc, [pc, #+4]
+ JCE.emitWordLE(0xe59fc004); // ldr ip, [pc, #+4]
JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip
JCE.emitWordLE(0xe59cf000); // ldr pc, [ip]
- JCE.emitWordLE(LazyPtr - (Addr+4+8)); // func - (L_func$scv+8)
- sys::Memory::InvalidateInstructionCache((void*)Addr, 16);
- if (!sys::Memory::setRangeExecutable((void*)Addr, 16)) {
+ JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8)); // func - (L_func$scv+8)
+ sys::Memory::InvalidateInstructionCache(Addr, 16);
+ if (!sys::Memory::setRangeExecutable(Addr, 16)) {
llvm_unreachable("ERROR: Unable to mark stub executable");
}
} else {
// The stub is 8-byte size and 4-aligned.
- JCE.startGVStub(F, 8, 4);
- intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
- if (!sys::Memory::setRangeWritable((void*)Addr, 8)) {
+ JCE.emitAlignment(4);
+ Addr = (void*)JCE.getCurrentPCValue();
+ if (!sys::Memory::setRangeWritable(Addr, 8)) {
llvm_unreachable("ERROR: Unable to mark stub writable");
}
JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
JCE.emitWordLE((intptr_t)Fn); // addr of function
- sys::Memory::InvalidateInstructionCache((void*)Addr, 8);
- if (!sys::Memory::setRangeExecutable((void*)Addr, 8)) {
+ sys::Memory::InvalidateInstructionCache(Addr, 8);
+ if (!sys::Memory::setRangeExecutable(Addr, 8)) {
llvm_unreachable("ERROR: Unable to mark stub executable");
}
}
@@ -207,9 +216,9 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
//
// Branch and link to the compilation callback.
// The stub is 16-byte size and 4-byte aligned.
- JCE.startGVStub(F, 16, 4);
- intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
- if (!sys::Memory::setRangeWritable((void*)Addr, 16)) {
+ JCE.emitAlignment(4);
+ Addr = (void*)JCE.getCurrentPCValue();
+ if (!sys::Memory::setRangeWritable(Addr, 16)) {
llvm_unreachable("ERROR: Unable to mark stub writable");
}
// Save LR so the callback can determine which stub called it.
@@ -222,13 +231,13 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
// The address of the compilation callback.
JCE.emitWordLE((intptr_t)ARMCompilationCallback);
- sys::Memory::InvalidateInstructionCache((void*)Addr, 16);
- if (!sys::Memory::setRangeExecutable((void*)Addr, 16)) {
+ sys::Memory::InvalidateInstructionCache(Addr, 16);
+ if (!sys::Memory::setRangeExecutable(Addr, 16)) {
llvm_unreachable("ERROR: Unable to mark stub executable");
}
}
- return JCE.finishGVStub(F);
+ return Addr;
}
intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const {
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index 7dfeed8..ff332b7 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -61,6 +61,10 @@ namespace llvm {
virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
JITCodeEmitter &JCE);
+ // getStubLayout - Returns the size and alignment of the largest call stub
+ // on ARM.
+ virtual StubLayout getStubLayout();
+
/// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
/// small native function that simply calls the function at the specified
/// address.
diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td
index 427645c..bbbf413 100644
--- a/lib/Target/ARM/ARMScheduleV7.td
+++ b/lib/Target/ARM/ARMScheduleV7.td
@@ -180,7 +180,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
// Double-precision FP Unary
InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<4, [FU_NPipe], 0>,
- InstrStage<4, [FU_NLSPipe]>]>,
+ InstrStage<4, [FU_NLSPipe]>], [4, 1]>,
//
// Single-precision FP Compare
InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -189,17 +189,17 @@ def CortexA8Itineraries : ProcessorItineraries<[
// Double-precision FP Compare
InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<4, [FU_NPipe], 0>,
- InstrStage<4, [FU_NLSPipe]>]>,
+ InstrStage<4, [FU_NLSPipe]>], [4, 1]>,
//
// Single to Double FP Convert
InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<7, [FU_NPipe], 0>,
- InstrStage<7, [FU_NLSPipe]>]>,
+ InstrStage<7, [FU_NLSPipe]>], [7, 1]>,
//
// Double to Single FP Convert
InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<5, [FU_NPipe], 0>,
- InstrStage<5, [FU_NLSPipe]>]>,
+ InstrStage<5, [FU_NLSPipe]>], [5, 1]>,
//
// Single-Precision FP to Integer Convert
InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -208,7 +208,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
// Double-Precision FP to Integer Convert
InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<8, [FU_NPipe], 0>,
- InstrStage<8, [FU_NLSPipe]>]>,
+ InstrStage<8, [FU_NLSPipe]>], [8, 1]>,
//
// Integer to Single-Precision FP Convert
InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -217,7 +217,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
// Integer to Double-Precision FP Convert
InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<8, [FU_NPipe], 0>,
- InstrStage<8, [FU_NLSPipe]>]>,
+ InstrStage<8, [FU_NLSPipe]>], [8, 1]>,
//
// Single-precision FP ALU
InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -226,7 +226,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
// Double-precision FP ALU
InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<9, [FU_NPipe], 0>,
- InstrStage<9, [FU_NLSPipe]>]>,
+ InstrStage<9, [FU_NLSPipe]>], [9, 1, 1]>,
//
// Single-precision FP Multiply
InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -235,7 +235,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
// Double-precision FP Multiply
InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<11, [FU_NPipe], 0>,
- InstrStage<11, [FU_NLSPipe]>]>,
+ InstrStage<11, [FU_NLSPipe]>], [11, 1, 1]>,
//
// Single-precision FP MAC
InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -244,27 +244,27 @@ def CortexA8Itineraries : ProcessorItineraries<[
// Double-precision FP MAC
InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<19, [FU_NPipe], 0>,
- InstrStage<19, [FU_NLSPipe]>]>,
+ InstrStage<19, [FU_NLSPipe]>], [19, 2, 1, 1]>,
//
// Single-precision FP DIV
InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<20, [FU_NPipe], 0>,
- InstrStage<20, [FU_NLSPipe]>]>,
+ InstrStage<20, [FU_NLSPipe]>], [20, 1, 1]>,
//
// Double-precision FP DIV
InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<29, [FU_NPipe], 0>,
- InstrStage<29, [FU_NLSPipe]>]>,
+ InstrStage<29, [FU_NLSPipe]>], [29, 1, 1]>,
//
// Single-precision FP SQRT
InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<19, [FU_NPipe], 0>,
- InstrStage<19, [FU_NLSPipe]>]>,
+ InstrStage<19, [FU_NLSPipe]>], [19, 1]>,
//
// Double-precision FP SQRT
InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<29, [FU_NPipe], 0>,
- InstrStage<29, [FU_NLSPipe]>]>,
+ InstrStage<29, [FU_NLSPipe]>], [29, 1]>,
//
// Single-precision FP Load
// use FU_Issue to enforce the 1 load/store per cycle limit
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 432ed78..71f3883 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -27,6 +27,10 @@ UseNEONFP("arm-use-neon-fp",
cl::desc("Use NEON for single-precision FP"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+UseMOVT("arm-use-movt",
+ cl::init(true), cl::Hidden);
+
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
bool isT)
: ARMArchVersion(V4T)
@@ -36,6 +40,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
, ThumbMode(Thumb1)
, PostRAScheduler(false)
, IsR9Reserved(ReserveR9)
+ , UseMovt(UseMOVT)
, stackAlignment(4)
, CPUString("generic")
, TargetType(isELF) // Default to ELF unless otherwise specified.
@@ -109,8 +114,6 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
if (UseNEONFP.getPosition() == 0)
UseNEONForSinglePrecisionFP = true;
}
- HasBranchTargetBuffer = (CPUString == "cortex-a8" ||
- CPUString == "cortex-a9");
}
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 3d0e01e..3f06b7b 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -50,9 +50,6 @@ protected:
/// determine if NEON should actually be used.
bool UseNEONForSinglePrecisionFP;
- /// HasBranchTargetBuffer - True if processor can predict indirect branches.
- bool HasBranchTargetBuffer;
-
/// IsThumb - True if we are in thumb mode, false if in ARM mode.
bool IsThumb;
@@ -65,6 +62,10 @@ protected:
/// IsR9Reserved - True if R9 is a not available as general purpose register.
bool IsR9Reserved;
+ /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit
+ /// imms (including global addresses).
+ bool UseMovt;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -126,12 +127,12 @@ protected:
bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); }
bool hasThumb2() const { return ThumbMode >= Thumb2; }
- bool hasBranchTargetBuffer() const { return HasBranchTargetBuffer; }
-
bool isR9Reserved() const { return IsR9Reserved; }
+ bool useMovt() const { return UseMovt && hasV6T2Ops(); }
+
const std::string & getCPUString() const { return CPUString; }
-
+
/// enablePostRAScheduler - True at 'More' optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtarget::AntiDepBreakMode& Mode,
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index dd4a240..692bb19 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -330,6 +330,8 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
const char *Modifier) {
const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned TF = MO.getTargetFlags();
+
switch (MO.getType()) {
default:
assert(0 && "<unknown operand type>");
@@ -356,12 +358,12 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
case MachineOperand::MO_Immediate: {
int64_t Imm = MO.getImm();
O << '#';
- if (Modifier) {
- if (strcmp(Modifier, "lo16") == 0)
- O << ":lower16:";
- else if (strcmp(Modifier, "hi16") == 0)
- O << ":upper16:";
- }
+ if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+ (TF & ARMII::MO_LO16))
+ O << ":lower16:";
+ else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+ (TF & ARMII::MO_HI16))
+ O << ":upper16:";
O << Imm;
break;
}
@@ -371,6 +373,13 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
case MachineOperand::MO_GlobalAddress: {
bool isCallOp = Modifier && !strcmp(Modifier, "call");
GlobalValue *GV = MO.getGlobal();
+
+ if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+ (TF & ARMII::MO_LO16))
+ O << ":lower16:";
+ else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+ (TF & ARMII::MO_HI16))
+ O << ":upper16:";
O << Mang->getMangledName(GV);
printOffset(MO.getOffset());
@@ -998,7 +1007,7 @@ void ARMAsmPrinter::printNoHashImmediate(const MachineInstr *MI, int OpNum) {
void ARMAsmPrinter::printVFPf32ImmOperand(const MachineInstr *MI, int OpNum) {
const ConstantFP *FP = MI->getOperand(OpNum).getFPImm();
- O << '#' << ARM::getVFPf32Imm(FP->getValueAPF());
+ O << '#' << FP->getValueAPF().convertToFloat();
if (VerboseAsm) {
O.PadToColumn(MAI->getCommentColumn());
O << MAI->getCommentString() << ' ';
@@ -1008,7 +1017,7 @@ void ARMAsmPrinter::printVFPf32ImmOperand(const MachineInstr *MI, int OpNum) {
void ARMAsmPrinter::printVFPf64ImmOperand(const MachineInstr *MI, int OpNum) {
const ConstantFP *FP = MI->getOperand(OpNum).getFPImm();
- O << '#' << ARM::getVFPf64Imm(FP->getValueAPF());
+ O << '#' << FP->getValueAPF().convertToDouble();
if (VerboseAsm) {
O.PadToColumn(MAI->getCommentColumn());
O << MAI->getCommentString() << ' ';
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 7d767ec..50abcf4 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -81,8 +81,8 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
// afterwards
// - The imp-defs / imp-uses are superregs only, we don't care about
// them.
- BuildMI(MBB, *MI, MI->getDebugLoc(),
- TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg);
+ AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(),
+ TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg));
MBB.erase(MI);
MachineBasicBlock::iterator I = prior(NextMII);
MI = &*I;
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index ad1739c..b2fd7b3 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -78,7 +78,7 @@ namespace {
{ ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 },
{ ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 },
{ ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 },
- { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 },
+ { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1 },
// FIXME: Do we need the 16-bit 'S' variant?
{ ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 },
{ ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 },
@@ -413,6 +413,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
if (MI->getOperand(2).getImm() == 0)
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
break;
+ case ARM::t2MOVi16:
+ // Can convert only 'pure' immediate operands, not immediates obtained as
+ // globals' addresses.
+ if (MI->getOperand(1).isImm())
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+ break;
}
return false;
}
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 9217522..b5579f4 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -127,10 +127,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
setOperationAction(ISD::BIT_CONVERT, MVT::f32, Promote);
- // We don't have line number support yet.
- setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
- setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
- setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
// Not implemented yet.
diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp
index d328135..b3b711e 100644
--- a/lib/Target/Alpha/AlphaJITInfo.cpp
+++ b/lib/Target/Alpha/AlphaJITInfo.cpp
@@ -190,17 +190,27 @@ extern "C" {
#endif
}
+TargetJITInfo::StubLayout AlphaJITInfo::getStubLayout() {
+ // The stub contains 19 4-byte instructions, aligned at 4 bytes:
+ // R0 = R27
+ // 8 x "R27 <<= 8; R27 |= 8-bits-of-Target" == 16 instructions
+ // JMP R27
+ // Magic number so the compilation callback can recognize the stub.
+ StubLayout Result = {19 * 4, 4};
+ return Result;
+}
+
void *AlphaJITInfo::emitFunctionStub(const Function* F, void *Fn,
JITCodeEmitter &JCE) {
+ MachineCodeEmitter::BufferState BS;
//assert(Fn == AlphaCompilationCallback && "Where are you going?\n");
//Do things in a stupid slow way!
- JCE.startGVStub(F, 19*4);
void* Addr = (void*)(intptr_t)JCE.getCurrentPCValue();
for (int x = 0; x < 19; ++ x)
JCE.emitWordLE(0);
EmitBranchToAt(Addr, Fn);
DEBUG(errs() << "Emitting Stub to " << Fn << " at [" << Addr << "]\n");
- return JCE.finishGVStub(F);
+ return Addr;
}
TargetJITInfo::LazyResolverFn
diff --git a/lib/Target/Alpha/AlphaJITInfo.h b/lib/Target/Alpha/AlphaJITInfo.h
index ecb467f..bd358a4 100644
--- a/lib/Target/Alpha/AlphaJITInfo.h
+++ b/lib/Target/Alpha/AlphaJITInfo.h
@@ -31,6 +31,7 @@ namespace llvm {
explicit AlphaJITInfo(TargetMachine &tm) : TM(tm)
{ useGOT = true; }
+ virtual StubLayout getStubLayout();
virtual void *emitFunctionStub(const Function* F, void *Fn,
JITCodeEmitter &JCE);
virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index c5c96f8..ad2510a 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -114,10 +114,6 @@ BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM)
// READCYCLECOUNTER needs special type legalization.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
- // We don't have line number support yet.
- setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
- setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
- setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
// Use the default implementation.
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td
index 642d10f..d396cc8 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.td
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -44,7 +44,7 @@ class Ra<bits<3> num, string n, list<Register> subs> : BlackfinReg<n> {
let Num = num;
}
-// Ywo halves of 32-bit register
+// Two halves of 32-bit register
multiclass Rss<bits<3> group, bits<3> num, string n> {
def H : Rs<group, num, 1, !strconcat(n, ".h")>;
def L : Rs<group, num, 0, !strconcat(n, ".l")>;
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 4dd82a6..23e192e 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -387,10 +387,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- // Support label based line numbers.
- setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
- setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
-
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index d3b575a..f24ffd2 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -31,14 +31,6 @@ let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in {
}
//===----------------------------------------------------------------------===//
-// DWARF debugging Pseudo Instructions
-//===----------------------------------------------------------------------===//
-
-def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file),
- ".loc $file, $line, $col",
- [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>;
-
-//===----------------------------------------------------------------------===//
// Loads:
// NB: The ordering is actually important, since the instruction selection
// will try each of the instructions in sequence, i.e., the D-form first with
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index c0084be..beccb2c 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -312,8 +312,8 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue N,
else if (AM.JT != -1)
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i16, 0/*AM.SymbolFlags*/);
else if (AM.BlockAddr)
- Disp = CurDAG->getBlockAddress(AM.BlockAddr, DebugLoc()/*MVT::i32*/,
- true /*AM.SymbolFlags*/);
+ Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
+ true, 0/*AM.SymbolFlags*/);
else
Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i16);
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 5a925f5..29cc370 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -162,7 +162,7 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned MSP430TargetLowering::getFunctionAlignment(const Function *F) const {
- return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4;
+ return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 2;
}
//===----------------------------------------------------------------------===//
@@ -594,9 +594,17 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
default: llvm_unreachable("Invalid integer condition!");
case ISD::SETEQ:
TCC = MSP430CC::COND_E; // aka COND_Z
+ // Minor optimization: if RHS is a constant, swap operands, then the
+ // constant can be folded into comparison.
+ if (RHS.getOpcode() == ISD::Constant)
+ std::swap(LHS, RHS);
break;
case ISD::SETNE:
TCC = MSP430CC::COND_NE; // aka COND_NZ
+ // Minor optimization: if RHS is a constant, swap operands, then the
+ // constant can be folded into comparison.
+ if (RHS.getOpcode() == ISD::Constant)
+ std::swap(LHS, RHS);
break;
case ISD::SETULE:
std::swap(LHS, RHS); // FALLTHROUGH
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index c3bbfe8..7a26f6c 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -823,37 +823,6 @@ def CMP16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2),
"cmp.w\t{$src1, $src2}",
[(MSP430cmp (load addr:$src1), GR16:$src2), (implicit SRW)]>;
-def CMP8mi0 : Pseudo<(outs), (ins memsrc:$src1),
- "cmp.b\t{$src1, #0}",
- [(MSP430cmp (load addr:$src1), (i8 0)), (implicit SRW)]>;
-def CMP16mi0: Pseudo<(outs), (ins memsrc:$src1),
- "cmp.w\t{$src1, #0}",
- [(MSP430cmp (load addr:$src1), (i16 0)), (implicit SRW)]>;
-def CMP8mi1 : Pseudo<(outs), (ins memsrc:$src1),
- "cmp.b\t{$src1, #1}",
- [(MSP430cmp (load addr:$src1), (i8 1)), (implicit SRW)]>;
-def CMP16mi1: Pseudo<(outs), (ins memsrc:$src1),
- "cmp.w\t{$src1, #1}",
- [(MSP430cmp (load addr:$src1), (i16 1)), (implicit SRW)]>;
-def CMP8mi2 : Pseudo<(outs), (ins memsrc:$src1),
- "cmp.b\t{$src1, #2}",
- [(MSP430cmp (load addr:$src1), (i8 2)), (implicit SRW)]>;
-def CMP16mi2: Pseudo<(outs), (ins memsrc:$src1),
- "cmp.w\t{$src1, #2}",
- [(MSP430cmp (load addr:$src1), (i16 2)), (implicit SRW)]>;
-def CMP8mi4 : Pseudo<(outs), (ins memsrc:$src1),
- "cmp.b\t{$src1, #4}",
- [(MSP430cmp (load addr:$src1), (i8 4)), (implicit SRW)]>;
-def CMP16mi4: Pseudo<(outs), (ins memsrc:$src1),
- "cmp.w\t{$src1, #4}",
- [(MSP430cmp (load addr:$src1), (i16 4)), (implicit SRW)]>;
-def CMP8mi8 : Pseudo<(outs), (ins memsrc:$src1),
- "cmp.b\t{$src1, #8}",
- [(MSP430cmp (load addr:$src1), (i8 8)), (implicit SRW)]>;
-def CMP16mi8: Pseudo<(outs), (ins memsrc:$src1),
- "cmp.w\t{$src1, #8}",
- [(MSP430cmp (load addr:$src1), (i16 8)), (implicit SRW)]>;
-
} // Defs = [SRW]
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
index 4e3a8d0..516eacb 100644
--- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
@@ -19,6 +19,7 @@ MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) {
WeakRefDirective ="\t.weak\t";
SetDirective = "\t.set\t";
PCSymbol=".";
+ CommentString = ";";
AlignmentIsInBytes = false;
AllowNameToStartWithDigit = true;
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 2990ba9..ede111d 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -144,6 +144,7 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base)
// on PIC code Load GA
if (TM.getRelocationModel() == Reloc::PIC_) {
if ((Addr.getOpcode() == ISD::TargetGlobalAddress) ||
+ (Addr.getOpcode() == ISD::TargetConstantPool) ||
(Addr.getOpcode() == ISD::TargetJumpTable)){
Base = CurDAG->getRegister(Mips::GP, MVT::i32);
Offset = Addr;
@@ -174,23 +175,21 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base)
}
// When loading from constant pools, load the lower address part in
- // the instruction itself. Instead of:
+ // the instruction itself. Example, instead of:
// lui $2, %hi($CPI1_0)
// addiu $2, $2, %lo($CPI1_0)
// lwc1 $f0, 0($2)
// Generate:
// lui $2, %hi($CPI1_0)
// lwc1 $f0, %lo($CPI1_0)($2)
- if (Addr.getOperand(0).getOpcode() == MipsISD::Hi &&
+ if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi ||
+ Addr.getOperand(0).getOpcode() == ISD::LOAD) &&
Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
SDValue LoVal = Addr.getOperand(1);
- if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(
- LoVal.getOperand(0))) {
- if (!CP->getOffset()) {
- Base = Addr.getOperand(0);
- Offset = LoVal.getOperand(0);
- return true;
- }
+ if (dyn_cast<ConstantPoolSDNode>(LoVal.getOperand(0))) {
+ Base = Addr.getOperand(0);
+ Offset = LoVal.getOperand(0);
+ return true;
}
}
}
@@ -235,6 +234,10 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDValue N) {
else
return NULL;
+ // Choose the offsets depending on the endianess
+ if (TM.getTargetData()->isBigEndian())
+ std::swap(Offset0, Offset1);
+
// Instead of:
// ldc $f0, X($3)
// Generate:
@@ -296,6 +299,10 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDValue N) {
else
return NULL;
+ // Choose the offsets depending on the endianess
+ if (TM.getTargetData()->isBigEndian())
+ std::swap(Offset0, Offset1);
+
// Instead of:
// sdc $f0, X($3)
// Generate:
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index c9a43b4..ced8b93 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -132,10 +132,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::FLOG10, MVT::f32, Expand);
setOperationAction(ISD::FEXP, MVT::f32, Expand);
- // We don't have line number support yet.
- setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
- setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
- setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
// Use the default for now
@@ -567,8 +563,6 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
SDValue ResNode;
ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
Constant *C = N->getConstVal();
- SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
- N->getOffset(), MipsII::MO_ABS_HILO);
// FIXME there isn't actually debug info here
DebugLoc dl = Op.getDebugLoc();
@@ -581,11 +575,21 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
// SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
// SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
// ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
- //} else { // %hi/%lo relocation
+
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), MipsII::MO_ABS_HILO);
SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP);
SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
- //}
+ } else {
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), MipsII::MO_GOT);
+ SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(),
+ CP, NULL, 0);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
+ ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+ }
return ResNode;
}
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index af64c9f..6d8e160 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -200,22 +200,33 @@ void MipsInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC) const {
- unsigned Opc;
-
DebugLoc DL = DebugLoc::getUnknownLoc();
if (I != MBB.end()) DL = I->getDebugLoc();
if (RC == Mips::CPURegsRegisterClass)
- Opc = Mips::SW;
+ BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill))
+ .addImm(0).addFrameIndex(FI);
else if (RC == Mips::FGR32RegisterClass)
- Opc = Mips::SWC1;
- else {
- assert(RC == Mips::AFGR64RegisterClass);
- Opc = Mips::SDC1;
- }
-
- BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
+ BuildMI(MBB, I, DL, get(Mips::SWC1)).addReg(SrcReg, getKillRegState(isKill))
.addImm(0).addFrameIndex(FI);
+ else if (RC == Mips::AFGR64RegisterClass) {
+ if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
+ BuildMI(MBB, I, DL, get(Mips::SDC1))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addImm(0).addFrameIndex(FI);
+ } else {
+ const TargetRegisterInfo *TRI =
+ MBB.getParent()->getTarget().getRegisterInfo();
+ const unsigned *SubSet = TRI->getSubRegisters(SrcReg);
+ BuildMI(MBB, I, DL, get(Mips::SWC1))
+ .addReg(SubSet[0], getKillRegState(isKill))
+ .addImm(0).addFrameIndex(FI);
+ BuildMI(MBB, I, DL, get(Mips::SWC1))
+ .addReg(SubSet[1], getKillRegState(isKill))
+ .addImm(4).addFrameIndex(FI);
+ }
+ } else
+ llvm_unreachable("Register class not handled!");
}
void MipsInstrInfo::
@@ -223,19 +234,27 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC) const
{
- unsigned Opc;
- if (RC == Mips::CPURegsRegisterClass)
- Opc = Mips::LW;
- else if (RC == Mips::FGR32RegisterClass)
- Opc = Mips::LWC1;
- else {
- assert(RC == Mips::AFGR64RegisterClass);
- Opc = Mips::LDC1;
- }
-
DebugLoc DL = DebugLoc::getUnknownLoc();
if (I != MBB.end()) DL = I->getDebugLoc();
- BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0).addFrameIndex(FI);
+
+ if (RC == Mips::CPURegsRegisterClass)
+ BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addImm(0).addFrameIndex(FI);
+ else if (RC == Mips::FGR32RegisterClass)
+ BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addImm(0).addFrameIndex(FI);
+ else if (RC == Mips::AFGR64RegisterClass) {
+ if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
+ BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addImm(0).addFrameIndex(FI);
+ } else {
+ const TargetRegisterInfo *TRI =
+ MBB.getParent()->getTarget().getRegisterInfo();
+ const unsigned *SubSet = TRI->getSubRegisters(DestReg);
+ BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0])
+ .addImm(0).addFrameIndex(FI);
+ BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[1])
+ .addImm(4).addFrameIndex(FI);
+ }
+ } else
+ llvm_unreachable("Register class not handled!");
}
MachineInstr *MipsInstrInfo::
@@ -278,11 +297,14 @@ foldMemoryOperandImpl(MachineFunction &MF,
const TargetRegisterClass
*RC = RI.getRegClass(MI->getOperand(0).getReg());
unsigned StoreOpc, LoadOpc;
+ bool IsMips1 = TM.getSubtarget<MipsSubtarget>().isMips1();
if (RC == Mips::FGR32RegisterClass) {
LoadOpc = Mips::LWC1; StoreOpc = Mips::SWC1;
} else {
assert(RC == Mips::AFGR64RegisterClass);
+ // Mips1 doesn't have ldc/sdc instructions.
+ if (IsMips1) break;
LoadOpc = Mips::LDC1; StoreOpc = Mips::SDC1;
}
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index ad326db..cae4181 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -107,8 +107,7 @@ getCalleeSavedRegs(const MachineFunction *MF) const
static const unsigned BitMode32CalleeSavedRegs[] = {
Mips::S0, Mips::S1, Mips::S2, Mips::S3,
Mips::S4, Mips::S5, Mips::S6, Mips::S7,
- Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30,
- Mips::D10, Mips::D11, Mips::D12, Mips::D13, Mips::D14, Mips::D15,0
+ Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, 0
};
if (Subtarget.isSingleFloat())
@@ -136,9 +135,7 @@ MipsRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
&Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
&Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
&Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
- &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
- &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, &Mips::AFGR64RegClass,
- &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, 0
+ &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0
};
if (Subtarget.isSingleFloat())
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index 0ed44d2..6e0e3ce 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -306,10 +306,9 @@ void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
int ElementAux[PIC16Dbg::AuxSize] = { 0 };
std::string TagName = "";
DIDerivedType DITy(Element.getNode());
- const char *ElementName = DITy.getName();
unsigned short ElementSize = DITy.getSizeInBits()/8;
// Get mangleddd name for this structure/union element.
- std::string MangMemName = ElementName + SuffixNo;
+ std::string MangMemName = DITy.getName().str() + SuffixNo;
PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName);
short Class = 0;
if( CTy.getTag() == dwarf::DW_TAG_union_type)
@@ -337,12 +336,11 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
continue;
if (CTy.getTag() == dwarf::DW_TAG_union_type ||
CTy.getTag() == dwarf::DW_TAG_structure_type ) {
- const char *Name = CTy.getName();
// Get the number after llvm.dbg.composite and make UniqueSuffix from
// it.
std::string DIVar = CTy.getNode()->getNameStr();
std::string UniqueSuffix = "." + DIVar.substr(18);
- std::string MangledCTyName = Name + UniqueSuffix;
+ std::string MangledCTyName = CTy.getName().str() + UniqueSuffix;
unsigned short size = CTy.getSizeInBits()/8;
int Aux[PIC16Dbg::AuxSize] = {0};
// 7th and 8th byte represent size of structure/union.
diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h
index 65f113e..73d30bf 100644
--- a/lib/Target/PowerPC/PPCFrameInfo.h
+++ b/lib/Target/PowerPC/PPCFrameInfo.h
@@ -42,11 +42,12 @@ public:
/// frame pointer.
static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
// For the Darwin ABI:
- // Use the TOC save slot in the PowerPC linkage area for saving the frame
- // pointer (if needed.) LLVM does not generate code that uses the TOC (R2
- // is treated as a caller saved register.)
+ // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
+ // for saving the frame pointer (if needed.) While the published ABI has
+ // not used this slot since at least MacOSX 10.2, there is older code
+ // around that does use it, and that needs to continue to work.
if (isDarwinABI)
- return isPPC64 ? 40 : 20;
+ return isPPC64 ? -8U : -4U;
// SVR4 ABI: First slot in the general register save area.
return -4U;
@@ -90,6 +91,17 @@ public:
// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
const SpillSlot *
getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+ if (TM.getSubtarget<PPCSubtarget>().isDarwinABI()) {
+ NumEntries = 1;
+ if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ static const SpillSlot darwin64Offsets = {PPC::X31, -8};
+ return &darwin64Offsets;
+ } else {
+ static const SpillSlot darwinOffsets = {PPC::R31, -4};
+ return &darwinOffsets;
+ }
+ }
+
// Early exit if not using the SVR4 ABI.
if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) {
NumEntries = 0;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index fb9a240..e7334b5 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -86,7 +86,7 @@ namespace {
/// isRotateAndMask - Returns true if Mask and Shift can be folded into a
/// rotate and mask opcode and mask operation.
- static bool isRotateAndMask(SDNode *N, unsigned Mask, bool IsShiftMask,
+ static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
unsigned &SH, unsigned &MB, unsigned &ME);
/// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
@@ -358,7 +358,7 @@ bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
}
bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
- bool IsShiftMask, unsigned &SH,
+ bool isShiftMask, unsigned &SH,
unsigned &MB, unsigned &ME) {
// Don't even go down this path for i64, since different logic will be
// necessary for rldicl/rldicr/rldimi.
@@ -374,12 +374,12 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
if (Opcode == ISD::SHL) {
// apply shift left to mask if it comes first
- if (IsShiftMask) Mask = Mask << Shift;
+ if (isShiftMask) Mask = Mask << Shift;
// determine which bits are made indeterminant by shift
Indeterminant = ~(0xFFFFFFFFu << Shift);
} else if (Opcode == ISD::SRL) {
// apply shift right to mask if it comes first
- if (IsShiftMask) Mask = Mask >> Shift;
+ if (isShiftMask) Mask = Mask >> Shift;
// determine which bits are made indeterminant by shift
Indeterminant = ~(0xFFFFFFFFu >> Shift);
// adjust for the left rotate
@@ -443,8 +443,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
unsigned MB, ME;
if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) {
- SDValue Tmp1, Tmp2, Tmp3;
- bool DisjointMask = (TargetMask ^ InsertMask) == 0xFFFFFFFF;
+ SDValue Tmp1, Tmp2;
if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
isInt32Immediate(Op1.getOperand(1), Value)) {
@@ -461,10 +460,9 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
Op1 = Op1.getOperand(0);
}
}
-
- Tmp3 = (Op0Opc == ISD::AND && DisjointMask) ? Op0.getOperand(0) : Op0;
+
SH &= 31;
- SDValue Ops[] = { Tmp3, Op1, getI32Imm(SH), getI32Imm(MB),
+ SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB),
getI32Imm(ME) };
return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 099fcb5..30a7861 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -182,10 +182,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- // Support label based line numbers.
- setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
- setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
-
setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
@@ -1174,7 +1170,7 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
DebugLoc DL = Op.getDebugLoc();
BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- SDValue TgtBA = DAG.getBlockAddress(BA, DL, /*isTarget=*/true);
+ SDValue TgtBA = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true);
SDValue Zero = DAG.getConstant(0, PtrVT);
SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, TgtBA, Zero);
SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, TgtBA, Zero);
@@ -2177,10 +2173,10 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
/// adjusted to accomodate the arguments for the tailcall.
-static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall,
+static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
unsigned ParamSize) {
- if (!IsTailCall) return 0;
+ if (!isTailCall) return 0;
PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
unsigned CallerMinReservedArea = FI->getMinReservedArea();
@@ -3190,8 +3186,8 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Construct the stack pointer operand.
- bool IsPPC64 = Subtarget.isPPC64();
- unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1;
+ bool isPPC64 = Subtarget.isPPC64();
+ unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
SDValue StackPtr = DAG.getRegister(SP, PtrVT);
// Get the operands for the STACKRESTORE.
@@ -3213,7 +3209,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
SDValue
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- bool IsPPC64 = PPCSubTarget.isPPC64();
+ bool isPPC64 = PPCSubTarget.isPPC64();
bool isDarwinABI = PPCSubTarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -3225,9 +3221,9 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
// If the frame pointer save index hasn't been defined yet.
if (!RASI) {
// Find out what the fix offset of the frame pointer save area.
- int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
+ int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI);
// Allocate the frame index for frame pointer save area.
- RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset,
+ RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset,
true, false);
// Save the result.
FI->setReturnAddrSaveIndex(RASI);
@@ -3238,7 +3234,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
SDValue
PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- bool IsPPC64 = PPCSubTarget.isPPC64();
+ bool isPPC64 = PPCSubTarget.isPPC64();
bool isDarwinABI = PPCSubTarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -3250,11 +3246,11 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
// If the frame pointer save index hasn't been defined yet.
if (!FPSI) {
// Find out what the fix offset of the frame pointer save area.
- int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64,
+ int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
isDarwinABI);
// Allocate the frame index for frame pointer save area.
- FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset,
+ FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset,
true, false);
// Save the result.
FI->setFramePointerSaveIndex(FPSI);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index f5c095a..2b3f80d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1358,15 +1358,6 @@ def RLWNM : MForm_2<23,
//===----------------------------------------------------------------------===//
-// DWARF Pseudo Instructions
-//
-
-def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file),
- "${:comment} .loc $file, $line, $col",
- [(dwarf_loc (i32 imm:$line), (i32 imm:$col),
- (i32 imm:$file))]>;
-
-//===----------------------------------------------------------------------===//
// PowerPC Instruction Patterns
//
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index ef25d92..c679bcd 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -323,6 +323,15 @@ PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
}
+TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() {
+ // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3
+ // instructions to save the caller's address if this is a lazy-compilation
+ // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address
+ // into a register and jump through it.
+ StubLayout Result = {10*4, 4};
+ return Result;
+}
+
#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
defined(__APPLE__)
extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
@@ -330,12 +339,12 @@ extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
JITCodeEmitter &JCE) {
+ MachineCodeEmitter::BufferState BS;
// If this is just a call to an external function, emit a branch instead of a
// call. The code is the same except for one bit of the last instruction.
if (Fn != (void*)(intptr_t)PPC32CompilationCallback &&
Fn != (void*)(intptr_t)PPC64CompilationCallback) {
- JCE.startGVStub(F, 7*4);
- intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+ void *Addr = (void*)JCE.getCurrentPCValue();
JCE.emitWordBE(0);
JCE.emitWordBE(0);
JCE.emitWordBE(0);
@@ -343,13 +352,12 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
JCE.emitWordBE(0);
JCE.emitWordBE(0);
JCE.emitWordBE(0);
- EmitBranchToAt(Addr, (intptr_t)Fn, false, is64Bit);
- sys::Memory::InvalidateInstructionCache((void*)Addr, 7*4);
- return JCE.finishGVStub(F);
+ EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit);
+ sys::Memory::InvalidateInstructionCache(Addr, 7*4);
+ return Addr;
}
- JCE.startGVStub(F, 10*4);
- intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+ void *Addr = (void*)JCE.getCurrentPCValue();
if (is64Bit) {
JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
JCE.emitWordBE(0x7d6802a6); // mflr r11
@@ -372,8 +380,8 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
JCE.emitWordBE(0);
JCE.emitWordBE(0);
EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
- sys::Memory::InvalidateInstructionCache((void*)Addr, 10*4);
- return JCE.finishGVStub(F);
+ sys::Memory::InvalidateInstructionCache(Addr, 10*4);
+ return Addr;
}
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
index 2e25b29..47ead59 100644
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -30,6 +30,7 @@ namespace llvm {
is64Bit = tmIs64Bit;
}
+ virtual StubLayout getStubLayout();
virtual void *emitFunctionStub(const Function* F, void *Fn,
JITCodeEmitter &JCE);
virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index e65e644..0c3c8eb 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1032,18 +1032,17 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Save R31 if necessary
int FPSI = FI->getFramePointerSaveIndex();
- bool IsPPC64 = Subtarget.isPPC64();
- bool IsSVR4ABI = Subtarget.isSVR4ABI();
+ bool isPPC64 = Subtarget.isPPC64();
bool isDarwinABI = Subtarget.isDarwinABI();
MachineFrameInfo *MFI = MF.getFrameInfo();
// If the frame pointer save index hasn't been defined yet.
- if (!FPSI && needsFP(MF) && IsSVR4ABI) {
+ if (!FPSI && needsFP(MF)) {
// Find out what the fix offset of the frame pointer save area.
- int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64,
+ int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
isDarwinABI);
// Allocate the frame index for frame pointer save area.
- FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset,
+ FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset,
true, false);
// Save the result.
FI->setFramePointerSaveIndex(FPSI);
@@ -1067,7 +1066,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (needsFP(MF) || spillsCR(MF)) {
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *RC = IsPPC64 ? G8RC : GPRC;
+ const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
false));
@@ -1297,7 +1296,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
int NegFrameSize = -FrameSize;
// Get processor type.
- bool IsPPC64 = Subtarget.isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
// Get operating system
bool isDarwinABI = Subtarget.isDarwinABI();
// Check if the link register (LR) must be saved.
@@ -1306,7 +1305,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
// Do we have a frame pointer for this function?
bool HasFP = hasFP(MF) && FrameSize;
- int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
+ int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI);
int FPOffset = 0;
if (HasFP) {
@@ -1316,11 +1315,11 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
assert(FPIndex && "No Frame Pointer Save Slot!");
FPOffset = FFI->getObjectOffset(FPIndex);
} else {
- FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI);
+ FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
}
}
- if (IsPPC64) {
+ if (isPPC64) {
if (MustSaveLR)
BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
@@ -1361,7 +1360,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
// Adjust stack pointer: r1 += NegFrameSize.
// If there is a preferred stack alignment, align R1 now
- if (!IsPPC64) {
+ if (!isPPC64) {
// PPC32.
if (ALIGN_STACK && MaxAlign > TargetAlign) {
assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!");
@@ -1444,19 +1443,19 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
} else {
- MachineLocation SP(IsPPC64 ? PPC::X31 : PPC::R31);
+ MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31);
Moves.push_back(MachineMove(FrameLabelId, SP, SP));
}
if (HasFP) {
MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
- MachineLocation FPSrc(IsPPC64 ? PPC::X31 : PPC::R31);
+ MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31);
Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc));
}
if (MustSaveLR) {
MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
- MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR);
+ MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR);
Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc));
}
}
@@ -1465,7 +1464,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
// If there is a frame pointer, copy R1 into R31
if (HasFP) {
- if (!IsPPC64) {
+ if (!isPPC64) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
.addReg(PPC::R1)
.addReg(PPC::R1);
@@ -1481,8 +1480,8 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
// Mark effective beginning of when frame pointer is ready.
BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId);
- MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) :
- (IsPPC64 ? PPC::X1 : PPC::R1));
+ MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
+ (isPPC64 ? PPC::X1 : PPC::R1));
MachineLocation FPSrc(MachineLocation::VirtualFP);
Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
}
@@ -1528,7 +1527,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
int FrameSize = MFI->getStackSize();
// Get processor type.
- bool IsPPC64 = Subtarget.isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
// Get operating system
bool isDarwinABI = Subtarget.isDarwinABI();
// Check if the link register (LR) has been saved.
@@ -1537,7 +1536,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
// Do we have a frame pointer for this function?
bool HasFP = hasFP(MF) && FrameSize;
- int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
+ int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI);
int FPOffset = 0;
if (HasFP) {
@@ -1547,7 +1546,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
assert(FPIndex && "No Frame Pointer Save Slot!");
FPOffset = FFI->getObjectOffset(FPIndex);
} else {
- FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI);
+ FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
}
}
@@ -1575,7 +1574,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
if (FrameSize) {
// The loaded (or persistent) stack pointer value is offset by the 'stwu'
// on entry to the function. Add this offset back now.
- if (!IsPPC64) {
+ if (!isPPC64) {
// If this function contained a fastcc call and PerformTailCallOpt is
// enabled (=> hasFastCall()==true) the fastcc call might contain a tail
// call which invalidates the stack pointer value in SP(0). So we use the
@@ -1629,7 +1628,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
}
}
- if (IsPPC64) {
+ if (isPPC64) {
if (MustSaveLR)
BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
.addImm(LROffset/4).addReg(PPC::X1);
@@ -1659,13 +1658,13 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
MF.getFunction()->getCallingConv() == CallingConv::Fast) {
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned CallerAllocatedAmt = FI->getMinReservedArea();
- unsigned StackReg = IsPPC64 ? PPC::X1 : PPC::R1;
- unsigned FPReg = IsPPC64 ? PPC::X31 : PPC::R31;
- unsigned TmpReg = IsPPC64 ? PPC::X0 : PPC::R0;
- unsigned ADDIInstr = IsPPC64 ? PPC::ADDI8 : PPC::ADDI;
- unsigned ADDInstr = IsPPC64 ? PPC::ADD8 : PPC::ADD4;
- unsigned LISInstr = IsPPC64 ? PPC::LIS8 : PPC::LIS;
- unsigned ORIInstr = IsPPC64 ? PPC::ORI8 : PPC::ORI;
+ unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1;
+ unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
if (CallerAllocatedAmt && isInt16(CallerAllocatedAmt)) {
BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index aad621f..2d8a687 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -2,6 +2,29 @@ Target Independent Opportunities:
//===---------------------------------------------------------------------===//
+Dead argument elimination should be enhanced to handle cases when an argument is
+dead to an externally visible function. Though the argument can't be removed
+from the externally visible function, the caller doesn't need to pass it in.
+For example in this testcase:
+
+ void foo(int X) __attribute__((noinline));
+ void foo(int X) { sideeffect(); }
+ void bar(int A) { foo(A+1); }
+
+We compile bar to:
+
+define void @bar(i32 %A) nounwind ssp {
+ %0 = add nsw i32 %A, 1 ; <i32> [#uses=1]
+ tail call void @foo(i32 %0) nounwind noinline ssp
+ ret void
+}
+
+The add is dead, we could pass in 'i32 undef' instead. This occurs for C++
+templates etc, which usually have linkonce_odr/weak_odr linkage, not internal
+linkage.
+
+//===---------------------------------------------------------------------===//
+
With the recent changes to make the implicit def/use set explicit in
machineinstrs, we should change the target descriptions for 'call' instructions
so that the .td files don't list all the call-clobbered registers as implicit
@@ -220,7 +243,7 @@ so cool to turn it into something like:
... which would only do one 32-bit XOR per loop iteration instead of two.
It would also be nice to recognize the reg->size doesn't alias reg->node[i], but
-alas.
+this requires TBAA.
//===---------------------------------------------------------------------===//
@@ -280,6 +303,9 @@ unsigned int popcount(unsigned int input) {
return count;
}
+This is a form of idiom recognition for loops, the same thing that could be
+useful for recognizing memset/memcpy.
+
//===---------------------------------------------------------------------===//
These should turn into single 16-bit (unaligned?) loads on little/big endian
@@ -343,7 +369,7 @@ PHI Slicing could be extended to do this.
//===---------------------------------------------------------------------===//
-LSR should know what GPR types a target has. This code:
+LSR should know what GPR types a target has from TargetData. This code:
volatile short X, Y; // globals
@@ -369,7 +395,6 @@ LBB1_2:
LSR should reuse the "+" IV for the exit test.
-
//===---------------------------------------------------------------------===//
Tail call elim should be more aggressive, checking to see if the call is
@@ -441,25 +466,6 @@ entry:
//===---------------------------------------------------------------------===//
-"basicaa" should know how to look through "or" instructions that act like add
-instructions. For example in this code, the x*4+1 is turned into x*4 | 1, and
-basicaa can't analyze the array subscript, leading to duplicated loads in the
-generated code:
-
-void test(int X, int Y, int a[]) {
-int i;
- for (i=2; i<1000; i+=4) {
- a[i+0] = a[i-1+0]*a[i-2+0];
- a[i+1] = a[i-1+1]*a[i-2+1];
- a[i+2] = a[i-1+2]*a[i-2+2];
- a[i+3] = a[i-1+3]*a[i-2+3];
- }
-}
-
-BasicAA also doesn't do this for add. It needs to know that &A[i+1] != &A[i].
-
-//===---------------------------------------------------------------------===//
-
We should investigate an instruction sinking pass. Consider this silly
example in pic mode:
@@ -1110,6 +1116,8 @@ later.
//===---------------------------------------------------------------------===//
+[STORE SINKING]
+
Store sinking: This code:
void f (int n, int *cond, int *res) {
@@ -1165,6 +1173,8 @@ This is GCC PR38204.
//===---------------------------------------------------------------------===//
+[STORE SINKING]
+
GCC PR37810 is an interesting case where we should sink load/store reload
into the if block and outside the loop, so we don't reload/store it on the
non-call path.
@@ -1192,7 +1202,7 @@ we don't sink the store. We need partially dead store sinking.
//===---------------------------------------------------------------------===//
-[PHI TRANSLATE GEPs]
+[LOAD PRE CRIT EDGE SPLITTING]
GCC PR37166: Sinking of loads prevents SROA'ing the "g" struct on the stack
leading to excess stack traffic. This could be handled by GVN with some crazy
@@ -1209,99 +1219,59 @@ bb3: ; preds = %bb1, %bb2, %bb
%10 = getelementptr %struct.f* %c_addr.0, i32 0, i32 0
%11 = load i32* %10, align 4
-%11 is fully redundant, an in BB2 it should have the value %8.
+%11 is partially redundant, an in BB2 it should have the value %8.
+
+GCC PR33344 and PR35287 are similar cases.
-GCC PR33344 is a similar case.
//===---------------------------------------------------------------------===//
-[PHI TRANSLATE INDEXED GEPs] PR5313
+[LOAD PRE]
-Load redundancy elimination for simple loop. This loop:
+There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the
+GCC testsuite, ones we don't get yet are (checked through loadpre25):
-void append_text(const char* text,unsigned char * const io) {
- while(*text)
- *io=*text++;
-}
+[CRIT EDGE BREAKING]
+loadpre3.c predcom-4.c
-Compiles to have a fully redundant load in the loop (%2):
+[PRE OF READONLY CALL]
+loadpre5.c
-define void @append_text(i8* nocapture %text, i8* nocapture %io) nounwind {
-entry:
- %0 = load i8* %text, align 1 ; <i8> [#uses=1]
- %1 = icmp eq i8 %0, 0 ; <i1> [#uses=1]
- br i1 %1, label %return, label %bb
-
-bb: ; preds = %bb, %entry
- %indvar = phi i32 [ 0, %entry ], [ %tmp, %bb ] ; <i32> [#uses=2]
- %text_addr.04 = getelementptr i8* %text, i32 %indvar ; <i8*> [#uses=1]
- %2 = load i8* %text_addr.04, align 1 ; <i8> [#uses=1]
- store i8 %2, i8* %io, align 1
- %tmp = add i32 %indvar, 1 ; <i32> [#uses=2]
- %scevgep = getelementptr i8* %text, i32 %tmp ; <i8*> [#uses=1]
- %3 = load i8* %scevgep, align 1 ; <i8> [#uses=1]
- %4 = icmp eq i8 %3, 0 ; <i1> [#uses=1]
- br i1 %4, label %return, label %bb
-
-return: ; preds = %bb, %entry
- ret void
-}
+[TURN SELECT INTO BRANCH]
+loadpre14.c loadpre15.c
-//===---------------------------------------------------------------------===//
+actually a conditional increment: loadpre18.c loadpre19.c
-There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the
-GCC testsuite. There are many pre testcases as ssa-pre-*.c
//===---------------------------------------------------------------------===//
-There are some interesting cases in testsuite/gcc.dg/tree-ssa/pred-comm* in the
-GCC testsuite. For example, predcom-1.c is:
-
- for (i = 2; i < 1000; i++)
- fib[i] = (fib[i-1] + fib[i - 2]) & 0xffff;
-
-which compiles into:
-
-bb1: ; preds = %bb1, %bb1.thread
- %indvar = phi i32 [ 0, %bb1.thread ], [ %0, %bb1 ]
- %i.0.reg2mem.0 = add i32 %indvar, 2
- %0 = add i32 %indvar, 1 ; <i32> [#uses=3]
- %1 = getelementptr [1000 x i32]* @fib, i32 0, i32 %0
- %2 = load i32* %1, align 4 ; <i32> [#uses=1]
- %3 = getelementptr [1000 x i32]* @fib, i32 0, i32 %indvar
- %4 = load i32* %3, align 4 ; <i32> [#uses=1]
- %5 = add i32 %4, %2 ; <i32> [#uses=1]
- %6 = and i32 %5, 65535 ; <i32> [#uses=1]
- %7 = getelementptr [1000 x i32]* @fib, i32 0, i32 %i.0.reg2mem.0
- store i32 %6, i32* %7, align 4
- %exitcond = icmp eq i32 %0, 998 ; <i1> [#uses=1]
- br i1 %exitcond, label %return, label %bb1
+[SCALAR PRE]
+There are many PRE testcases in testsuite/gcc.dg/tree-ssa/ssa-pre-*.c in the
+GCC testsuite.
-This is basically:
- LOAD fib[i+1]
- LOAD fib[i]
- STORE fib[i+2]
+//===---------------------------------------------------------------------===//
-instead of handling this as a loop or other xform, all we'd need to do is teach
-load PRE to phi translate the %0 add (i+1) into the predecessor as (i'+1+1) =
-(i'+2) (where i' is the previous iteration of i). This would find the store
-which feeds it.
+There are some interesting cases in testsuite/gcc.dg/tree-ssa/pred-comm* in the
+GCC testsuite. For example, we get the first example in predcom-1.c, but
+miss the second one:
-predcom-2.c is apparently the same as predcom-1.c
-predcom-3.c is very similar but needs loads feeding each other instead of
-store->load.
-predcom-4.c seems the same as the rest.
+unsigned fib[1000];
+unsigned avg[1000];
+__attribute__ ((noinline))
+void count_averages(int n) {
+ int i;
+ for (i = 1; i < n; i++)
+ avg[i] = (((unsigned long) fib[i - 1] + fib[i] + fib[i + 1]) / 3) & 0xffff;
+}
-//===---------------------------------------------------------------------===//
+which compiles into two loads instead of one in the loop.
-Other simple load PRE cases:
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35287 [LPRE crit edge splitting]
+predcom-2.c is the same as predcom-1.c
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34677 (licm does this, LPRE crit edge)
- llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | opt -mem2reg -simplifycfg -gvn | llvm-dis
+predcom-3.c is very similar but needs loads feeding each other instead of
+store->load.
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS]
//===---------------------------------------------------------------------===//
@@ -1334,7 +1304,7 @@ Interesting missed case because of control flow flattening (should be 2 loads):
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629
With: llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as |
opt -mem2reg -gvn -instcombine | llvm-dis
-we miss it because we need 1) GEP PHI TRAN, 2) CRIT EDGE 3) MULTIPLE DIFFERENT
+we miss it because we need 1) CRIT EDGE 2) MULTIPLE DIFFERENT
VALS PRODUCED BY ONE BLOCK OVER DIFFERENT PATHS
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 133f828..1b3ca3e 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -644,10 +644,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- // We don't have line number support yet.
- setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
- setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
- setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex.
@@ -663,8 +659,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
// No debug info support yet.
- setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
- setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
setStackPointerRegisterToSaveRestore(SP::O6);
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 6fdbc92..f887523 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -783,8 +783,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
}
// Exception Handling.
- LSDASection = getMachOSection("__TEXT", "__gcc_except_tab", 0,
- SectionKind::getReadOnlyWithRel());
+ LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0,
+ SectionKind::getDataRel());
EHFrameSection =
getMachOSection("__TEXT", "__eh_frame",
MCSectionMachO::S_COALESCED |
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index be9f4b2..38c0c28 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -43,7 +43,6 @@ MCSymbol *X86MCInstLower::GetPICBaseSymbol() const {
Twine(AsmPrinter.getFunctionNumber())+"$pb");
}
-
/// LowerGlobalAddressOperand - Lower an MO_GlobalAddress operand to an
/// MCOperand.
MCSymbol *X86MCInstLower::
@@ -231,6 +230,19 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
return Ctx.GetOrCreateSymbol(Name.str());
}
+MCSymbol *X86MCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+ const char *Suffix = "";
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on BA operand");
+ case X86II::MO_NO_FLAG: break; // No flag.
+ case X86II::MO_PIC_BASE_OFFSET: break; // Doesn't modify symbol name.
+ case X86II::MO_GOTOFF: Suffix = "@GOTOFF"; break;
+ }
+
+ return AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress(), Suffix);
+}
+
MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
MCSymbol *Sym) const {
// FIXME: We would like an efficient form for this, so we don't have to do a
@@ -331,8 +343,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
break;
case MachineOperand::MO_BlockAddress:
- MCOp = LowerSymbolOperand(MO, AsmPrinter.GetBlockAddressSymbol(
- MO.getBlockAddress()));
+ MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
break;
}
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
index fa25b90..94f8bfc 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
@@ -43,6 +43,7 @@ public:
MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
private:
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
new file mode 100644
index 0000000..b329e89
--- /dev/null
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86Disassembler
+ X86Disassembler.cpp
+ )
+add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen)
diff --git a/lib/Target/X86/Disassembler/Makefile b/lib/Target/X86/Disassembler/Makefile
new file mode 100644
index 0000000..b289647
--- /dev/null
+++ b/lib/Target/X86/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/X86/Disassembler/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Disassembler
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
new file mode 100644
index 0000000..2ebbc9b
--- /dev/null
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -0,0 +1,29 @@
+//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "X86.h"
+using namespace llvm;
+
+static const MCDisassembler *createX86_32Disassembler(const Target &T) {
+ return 0;
+}
+
+static const MCDisassembler *createX86_64Disassembler(const Target &T) {
+ return 0;
+}
+
+extern "C" void LLVMInitializeX86Disassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
+ createX86_32Disassembler);
+ TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
+ createX86_64Disassembler);
+}
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
index 220831d..b311a6e 100644
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@@ -18,6 +18,6 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
X86GenFastISel.inc \
X86GenCallingConv.inc X86GenSubtarget.inc
-DIRS = AsmPrinter AsmParser TargetInfo
+DIRS = AsmPrinter AsmParser Disassembler TargetInfo
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 4497931..4892e17 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -595,7 +595,6 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
break;
case TargetInstrInfo::IMPLICIT_DEF:
case TargetInstrInfo::KILL:
- case X86::DWARF_LOC:
case X86::FP_REG_KILL:
break;
case X86::MOVPC32r: {
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 6a3577a..a9a78be 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -252,8 +252,8 @@ namespace {
else if (AM.JT != -1)
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
else if (AM.BlockAddr)
- Disp = CurDAG->getBlockAddress(AM.BlockAddr, DebugLoc()/*MVT::i32*/,
- true /*AM.SymbolFlags*/);
+ Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
+ true, AM.SymbolFlags);
else
Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
@@ -777,7 +777,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
AM.SymbolFlags = J->getTargetFlags();
} else {
AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
- //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+ AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
}
if (N.getOpcode() == X86ISD::WrapperRIP)
@@ -808,7 +808,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
AM.SymbolFlags = J->getTargetFlags();
} else {
AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
- //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+ AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
}
return false;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6018cf5..d80b8ec 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -373,13 +373,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
}
- // Use the default ISD::DBG_STOPPOINT.
- setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
// FIXME - use subtarget debug flags
if (!Subtarget->isTargetDarwin() &&
!Subtarget->isTargetELF() &&
!Subtarget->isTargetCygMing()) {
- setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
}
@@ -978,6 +975,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
computeRegisterProperties();
+ // Divide and reminder operations have no vector equivalent and can
+ // trap. Do a custom widening for these operations in which we never
+ // generate more divides/remainder than the original vector width.
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+ if (!isTypeLegal((MVT::SimpleValueType)VT)) {
+ setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom);
+ setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom);
+ setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom);
+ setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom);
+ }
+ }
+
// FIXME: These should be based on subtarget info. Plus, the values should
// be smaller when we are in optimizing for size mode.
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
@@ -4722,18 +4732,27 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
SDValue
X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
- unsigned WrapperKind = X86ISD::Wrapper;
+ // Create the TargetBlockAddressAddress node.
+ unsigned char OpFlags =
+ Subtarget->ClassifyBlockAddressReference();
CodeModel::Model M = getTargetMachine().getCodeModel();
+ BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Result = DAG.getBlockAddress(BA, getPointerTy(),
+ /*isTarget=*/true, OpFlags);
+
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
- WrapperKind = X86ISD::WrapperRIP;
-
- DebugLoc DL = Op.getDebugLoc();
-
- BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- SDValue Result = DAG.getBlockAddress(BA, DL, /*isTarget=*/true);
+ Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+ else
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
- Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+ // With PIC, the address is actually $g + Offset.
+ if (isGlobalRelativeToPICBase(OpFlags)) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
+ Result);
+ }
return Result;
}
@@ -7164,6 +7183,14 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(edx.getValue(1));
return;
}
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM: {
+ EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ Results.push_back(DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()));
+ return;
+ }
case ISD::ATOMIC_CMP_SWAP: {
EVT T = N->getValueType(0);
assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index a01534b..b5fa862 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1663,7 +1663,7 @@ def : Pat<(X86tcret GR64:$dst, imm:$off),
(TCRETURNri64 GR64:$dst, imm:$off)>;
def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
- (TCRETURNdi64 texternalsym:$dst, imm:$off)>;
+ (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>;
def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
(TCRETURNdi64 texternalsym:$dst, imm:$off)>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 1ddceb1..a37013d 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3133,7 +3133,6 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
break;
case TargetInstrInfo::IMPLICIT_DEF:
case TargetInstrInfo::KILL:
- case X86::DWARF_LOC:
case X86::FP_REG_KILL:
break;
case X86::MOVPC32r: {
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index a79f262..90ef1f4 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -718,7 +718,6 @@ def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset, variable_o
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-
def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst), "jmp\t$dst # TAILCALL",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
@@ -3506,16 +3505,6 @@ def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
[(set GR32:$dst, (fsload addr:$src))]>, SegFS;
//===----------------------------------------------------------------------===//
-// DWARF Pseudo Instructions
-//
-
-def DWARF_LOC : I<0, Pseudo, (outs),
- (ins i32imm:$line, i32imm:$col, i32imm:$file),
- ".loc\t$file $line $col",
- [(dwarf_loc (i32 imm:$line), (i32 imm:$col),
- (i32 imm:$file))]>;
-
-//===----------------------------------------------------------------------===//
// EH Pseudo Instructions
//
let isTerminator = 1, isReturn = 1, isBarrier = 1,
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index ee63d56..dfdd4ce 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2820,40 +2820,40 @@ defm PSIGND : SS3I_binop_rm_int_32<0x0A, "psignd",
let Constraints = "$src1 = $dst" in {
def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2, i16imm:$src3),
+ (ins VR64:$src1, VR64:$src2, i8imm:$src3),
"palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>;
def PALIGNR64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2, i16imm:$src3),
+ (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
"palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>;
def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32imm:$src3),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
"palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>, OpSize;
def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i32imm:$src3),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>, OpSize;
}
// palignr patterns.
-def : Pat<(int_x86_ssse3_palign_r VR64:$src1, VR64:$src2, (i16 imm:$src3)),
+def : Pat<(int_x86_ssse3_palign_r VR64:$src1, VR64:$src2, (i8 imm:$src3)),
(PALIGNR64rr VR64:$src1, VR64:$src2, (BYTE_imm imm:$src3))>,
Requires<[HasSSSE3]>;
def : Pat<(int_x86_ssse3_palign_r VR64:$src1,
(memop64 addr:$src2),
- (i16 imm:$src3)),
+ (i8 imm:$src3)),
(PALIGNR64rm VR64:$src1, addr:$src2, (BYTE_imm imm:$src3))>,
Requires<[HasSSSE3]>;
-def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, VR128:$src2, (i32 imm:$src3)),
+def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, VR128:$src2, (i8 imm:$src3)),
(PALIGNR128rr VR128:$src1, VR128:$src2, (BYTE_imm imm:$src3))>,
Requires<[HasSSSE3]>;
def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1,
(memopv2i64 addr:$src2),
- (i32 imm:$src3)),
+ (i8 imm:$src3)),
(PALIGNR128rm VR128:$src1, addr:$src2, (BYTE_imm imm:$src3))>,
Requires<[HasSSSE3]>;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 0792bdd..ce06f0f 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -426,83 +426,77 @@ X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) {
void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
JITCodeEmitter &JCE) {
+ MachineCodeEmitter::BufferState BS;
#if defined (X86_64_JIT)
- JCE.startGVStub(GV, 8, 8);
+ JCE.startGVStub(BS, GV, 8, 8);
JCE.emitWordLE((unsigned)(intptr_t)ptr);
JCE.emitWordLE((unsigned)(((intptr_t)ptr) >> 32));
#else
- JCE.startGVStub(GV, 4, 4);
+ JCE.startGVStub(BS, GV, 4, 4);
JCE.emitWordLE((intptr_t)ptr);
#endif
- return JCE.finishGVStub(GV);
+ return JCE.finishGVStub(BS);
}
-void *X86JITInfo::emitFunctionStub(const Function* F, void *Fn,
+TargetJITInfo::StubLayout X86JITInfo::getStubLayout() {
+ // The 64-bit stub contains:
+ // movabs r10 <- 8-byte-target-address # 10 bytes
+ // call|jmp *r10 # 3 bytes
+ // The 32-bit stub contains a 5-byte call|jmp.
+ // If the stub is a call to the compilation callback, an extra byte is added
+ // to mark it as a stub.
+ StubLayout Result = {14, 4};
+ return Result;
+}
+
+void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
JITCodeEmitter &JCE) {
+ MachineCodeEmitter::BufferState BS;
// Note, we cast to intptr_t here to silence a -pedantic warning that
// complains about casting a function pointer to a normal pointer.
#if defined (X86_32_JIT) && !defined (_MSC_VER)
- bool NotCC = (Fn != (void*)(intptr_t)X86CompilationCallback &&
- Fn != (void*)(intptr_t)X86CompilationCallback_SSE);
+ bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback &&
+ Target != (void*)(intptr_t)X86CompilationCallback_SSE);
#else
- bool NotCC = Fn != (void*)(intptr_t)X86CompilationCallback;
+ bool NotCC = Target != (void*)(intptr_t)X86CompilationCallback;
#endif
+ JCE.emitAlignment(4);
+ void *Result = (void*)JCE.getCurrentPCValue();
if (NotCC) {
#if defined (X86_64_JIT)
- JCE.startGVStub(F, 13, 4);
JCE.emitByte(0x49); // REX prefix
JCE.emitByte(0xB8+2); // movabsq r10
- JCE.emitWordLE((unsigned)(intptr_t)Fn);
- JCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32));
+ JCE.emitWordLE((unsigned)(intptr_t)Target);
+ JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
JCE.emitByte(0x41); // REX prefix
JCE.emitByte(0xFF); // jmpq *r10
JCE.emitByte(2 | (4 << 3) | (3 << 6));
#else
- JCE.startGVStub(F, 5, 4);
JCE.emitByte(0xE9);
- JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
+ JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
#endif
- return JCE.finishGVStub(F);
+ return Result;
}
#if defined (X86_64_JIT)
- JCE.startGVStub(F, 14, 4);
JCE.emitByte(0x49); // REX prefix
JCE.emitByte(0xB8+2); // movabsq r10
- JCE.emitWordLE((unsigned)(intptr_t)Fn);
- JCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32));
+ JCE.emitWordLE((unsigned)(intptr_t)Target);
+ JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
JCE.emitByte(0x41); // REX prefix
JCE.emitByte(0xFF); // callq *r10
JCE.emitByte(2 | (2 << 3) | (3 << 6));
#else
- JCE.startGVStub(F, 6, 4);
JCE.emitByte(0xE8); // Call with 32 bit pc-rel destination...
- JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
+ JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
#endif
// This used to use 0xCD, but that value is used by JITMemoryManager to
// initialize the buffer with garbage, which means it may follow a
// noreturn function call, confusing X86CompilationCallback2. PR 4929.
JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub!
- return JCE.finishGVStub(F);
-}
-
-void X86JITInfo::emitFunctionStubAtAddr(const Function* F, void *Fn, void *Stub,
- JITCodeEmitter &JCE) {
- // Note, we cast to intptr_t here to silence a -pedantic warning that
- // complains about casting a function pointer to a normal pointer.
- JCE.startGVStub(F, Stub, 5);
- JCE.emitByte(0xE9);
-#if defined (X86_64_JIT) && !defined (NDEBUG)
- // Yes, we need both of these casts, or some broken versions of GCC (4.2.4)
- // get the signed-ness of the expression wrong. Go figure.
- intptr_t Displacement = (intptr_t)Fn - (intptr_t)JCE.getCurrentPCValue() - 5;
- assert(((Displacement << 32) >> 32) == Displacement
- && "PIC displacement does not fit in displacement field!");
-#endif
- JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
- JCE.finishGVStub(F);
+ return Result;
}
/// getPICJumpTableEntry - Returns the value of the jumptable entry for the
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index c381433..238420c 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -43,18 +43,16 @@ namespace llvm {
virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
JITCodeEmitter &JCE);
+ // getStubLayout - Returns the size and alignment of the largest call stub
+ // on X86.
+ virtual StubLayout getStubLayout();
+
/// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
/// small native function that simply calls the function at the specified
/// address.
- virtual void *emitFunctionStub(const Function* F, void *Fn,
+ virtual void *emitFunctionStub(const Function* F, void *Target,
JITCodeEmitter &JCE);
- /// emitFunctionStubAtAddr - Use the specified JITCodeEmitter object to
- /// emit a small native function that simply calls Fn. Emit the stub into
- /// the supplied buffer.
- virtual void emitFunctionStubAtAddr(const Function* F, void *Fn,
- void *Buffer, JITCodeEmitter &JCE);
-
/// getPICJumpTableEntry - Returns the value of the jumptable entry for the
/// specific basic block.
virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase);
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index f577fcf..33852bd 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -1262,7 +1262,7 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
else if (RetOpcode== X86::TCRETURNri64)
BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg());
else
- BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg());
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg());
// Delete the pseudo instruction TCRETURN.
MBB.erase(MBBI);
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index b901c14..661f560 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -28,6 +28,21 @@ using namespace llvm;
#include <intrin.h>
#endif
+/// ClassifyBlockAddressReference - Classify a blockaddress reference for the
+/// current subtarget according to how we should reference it in a non-pcrel
+/// context.
+unsigned char X86Subtarget::
+ClassifyBlockAddressReference() const {
+ if (isPICStyleGOT()) // 32-bit ELF targets.
+ return X86II::MO_GOTOFF;
+
+ if (isPICStyleStubPIC()) // Darwin/32 in PIC mode.
+ return X86II::MO_PIC_BASE_OFFSET;
+
+ // Direct static reference to label.
+ return X86II::MO_NO_FLAG;
+}
+
/// ClassifyGlobalReference - Classify a global variable reference for the
/// current subtarget according to how we should reference it in a non-pcrel
/// context.
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 23f2841..fb457dd 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -199,6 +199,11 @@ public:
unsigned char ClassifyGlobalReference(const GlobalValue *GV,
const TargetMachine &TM)const;
+ /// ClassifyBlockAddressReference - Classify a blockaddress reference for the
+ /// current subtarget according to how we should reference it in a non-pcrel
+ /// context.
+ unsigned char ClassifyBlockAddressReference() const;
+
/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
/// to immediate address.
bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const;
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 00dcce6..f310456 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -142,10 +142,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
- // Debug
- setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
- setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
-
maxStoresPerMemset = 4;
maxStoresPerMemmove = maxStoresPerMemcpy = 2;
@@ -295,7 +291,7 @@ LowerBlockAddress(SDValue Op, SelectionDAG &DAG)
DebugLoc DL = Op.getDebugLoc();
BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- SDValue Result = DAG.getBlockAddress(BA, DL, /*isTarget=*/true);
+ SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result);
}
OpenPOWER on IntegriCloud