summaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2012-08-15 19:34:23 +0000
committerdim <dim@FreeBSD.org>2012-08-15 19:34:23 +0000
commit721c201bd55ffb73cb2ba8d39e0570fa38c44e15 (patch)
treeeacfc83d988e4b9d11114387ae7dc41243f2a363 /lib/Target/ARM
parent2b2816e083a455f7a656ae88b0fd059d1688bb36 (diff)
downloadFreeBSD-src-721c201bd55ffb73cb2ba8d39e0570fa38c44e15.zip
FreeBSD-src-721c201bd55ffb73cb2ba8d39e0570fa38c44e15.tar.gz
Vendor import of llvm trunk r161861:
http://llvm.org/svn/llvm-project/llvm/trunk@161861
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.td10
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp106
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h6
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp540
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h29
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp35
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h5
-rw-r--r--lib/Target/ARM/ARMCallingConv.td31
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp76
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp76
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp15
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp362
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp26
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp417
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp669
-rw-r--r--lib/Target/ARM/ARMISelLowering.h30
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td3
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp3
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td548
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td895
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td48
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td292
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td93
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp6
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp12
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td32
-rw-r--r--lib/Target/ARM/ARMSchedule.td24
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td50
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td58
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp7
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp37
-rw-r--r--lib/Target/ARM/ARMSubtarget.h5
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp24
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp43
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h4
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp341
-rw-r--r--lib/Target/ARM/CMakeLists.txt2
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp199
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp140
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp177
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h106
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp8
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp28
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp114
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp49
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp13
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp4
-rw-r--r--lib/Target/ARM/README.txt21
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp8
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp11
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h3
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp8
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp54
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h5
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp1
57 files changed, 3251 insertions, 2663 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 9b0cb0c..69e2346 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -141,7 +141,7 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
FeatureAvoidPartialCPSR]>;
class ProcNoItin<string Name, list<SubtargetFeature> Features>
- : Processor<Name, GenericItineraries, Features>;
+ : Processor<Name, NoItineraries, Features>;
// V4 Processors.
def : ProcNoItin<"generic", []>;
@@ -204,13 +204,13 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
FeatureDSPThumb2]>;
// V7a Processors.
-def : Processor<"cortex-a8", CortexA8Itineraries,
+def : ProcessorModel<"cortex-a8", CortexA8Model,
[ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureHasRAS]>;
-def : Processor<"cortex-a9", CortexA9Itineraries,
+def : ProcessorModel<"cortex-a9", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureHasRAS]>;
-def : Processor<"cortex-a9-mp", CortexA9Itineraries,
+def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureMP,
FeatureHasRAS]>;
@@ -224,7 +224,7 @@ def : ProcNoItin<"cortex-m3", [HasV7Ops,
def : ProcNoItin<"cortex-m4", [HasV7Ops,
FeatureThumb2, FeatureNoARM, FeatureDB,
FeatureHWDiv, FeatureDSPThumb2,
- FeatureT2XtPk, FeatureVFP2,
+ FeatureT2XtPk, FeatureVFP4,
FeatureVFPOnlySP, FeatureMClass]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 410790a..8536b94 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -23,8 +23,8 @@
#include "InstPrinter/ARMInstPrinter.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
#include "llvm/Assembly/Writer.h"
@@ -283,9 +283,16 @@ void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
}
}
-void ARMAsmPrinter::EmitFunctionEntryLabel() {
- OutStreamer.ForceCodeRegion();
+void ARMAsmPrinter::EmitFunctionBodyEnd() {
+ // Make sure to terminate any constant pools that were at the end
+ // of the function.
+ if (!InConstantPool)
+ return;
+ InConstantPool = false;
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+}
+void ARMAsmPrinter::EmitFunctionEntryLabel() {
if (AFI->isThumbFunction()) {
OutStreamer.EmitAssemblerFlag(MCAF_Code16);
OutStreamer.EmitThumbFunc(CurrentFnSym);
@@ -415,7 +422,9 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
case 'a': // Print as a memory address.
if (MI->getOperand(OpNum).isReg()) {
O << "["
@@ -434,15 +443,18 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
printOperand(MI, OpNum, O);
return false;
case 'y': // Print a VFP single precision register as indexed double.
- // This uses the ordering of the alias table to get the first 'd' register
- // that overlaps the 's' register. Also, s0 is an odd register, hence the
- // odd modulus check below.
if (MI->getOperand(OpNum).isReg()) {
unsigned Reg = MI->getOperand(OpNum).getReg();
const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
- O << ARMInstPrinter::getRegisterName(TRI->getAliasSet(Reg)[0]) <<
- (((Reg % 2) == 1) ? "[0]" : "[1]");
- return false;
+ // Find the 'd' register that has this 's' register as a sub-register,
+ // and determine the lane number.
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) {
+ if (!ARM::DPRRegClass.contains(*SR))
+ continue;
+ bool Lane0 = TRI->getSubReg(*SR, ARM::ssub_0) == Reg;
+ O << ARMInstPrinter::getRegisterName(*SR) << (Lane0 ? "[0]" : "[1]");
+ return false;
+ }
}
return true;
case 'B': // Bitwise inverse of integer or symbol without a preceding #.
@@ -517,10 +529,23 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
}
- // These modifiers are not yet supported.
+ // This modifier is not yet supported.
case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
- case 'H': // The highest-numbered register of a pair.
return true;
+ case 'H': // The highest-numbered register of a pair.
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (!MO.isReg())
+ return true;
+ const TargetRegisterClass &RC = ARM::GPRRegClass;
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+ unsigned RegIdx = TRI->getEncodingValue(MO.getReg());
+ RegIdx |= 1; //The odd register is also the higher-numbered one of a pair.
+
+ unsigned Reg = RC.getRegister(RegIdx);
+ O << ARMInstPrinter::getRegisterName(Reg);
+ return false;
}
}
@@ -934,13 +959,13 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
unsigned JTI = MO1.getIndex();
- // Tag the jump table appropriately for precise disassembly.
- OutStreamer.EmitJumpTable32Region();
-
// Emit a label for the jump table.
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
OutStreamer.EmitLabel(JTISymbol);
+ // Mark the jump table as data-in-code.
+ OutStreamer.EmitDataRegion(MCDR_DataRegionJT32);
+
// Emit each entry of the table.
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -969,6 +994,8 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
OutContext);
OutStreamer.EmitValue(Expr, 4);
}
+ // Mark the end of jump table data-in-code region.
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
}
void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
@@ -978,15 +1005,6 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
unsigned JTI = MO1.getIndex();
- // Emit a label for the jump table.
- if (MI->getOpcode() == ARM::t2TBB_JT) {
- OutStreamer.EmitJumpTable8Region();
- } else if (MI->getOpcode() == ARM::t2TBH_JT) {
- OutStreamer.EmitJumpTable16Region();
- } else {
- OutStreamer.EmitJumpTable32Region();
- }
-
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
OutStreamer.EmitLabel(JTISymbol);
@@ -995,10 +1013,15 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
unsigned OffsetWidth = 4;
- if (MI->getOpcode() == ARM::t2TBB_JT)
+ if (MI->getOpcode() == ARM::t2TBB_JT) {
OffsetWidth = 1;
- else if (MI->getOpcode() == ARM::t2TBH_JT)
+ // Mark the jump table as data-in-code.
+ OutStreamer.EmitDataRegion(MCDR_DataRegionJT8);
+ } else if (MI->getOpcode() == ARM::t2TBH_JT) {
OffsetWidth = 2;
+ // Mark the jump table as data-in-code.
+ OutStreamer.EmitDataRegion(MCDR_DataRegionJT16);
+ }
for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
MachineBasicBlock *MBB = JTBBs[i];
@@ -1031,6 +1054,11 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
OutContext);
OutStreamer.EmitValue(Expr, OffsetWidth);
}
+ // Mark the end of jump table data-in-code region. 32-bit offsets use
+ // actual branch instructions here, so we don't mark those as a data-region
+ // at all.
+ if (OffsetWidth != 4)
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
}
void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
@@ -1121,8 +1149,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
assert(SrcReg == ARM::SP &&
"Only stack pointer as a source reg is supported");
for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset;
- i != NumOps; ++i)
- RegList.push_back(MI->getOperand(i).getReg());
+ i != NumOps; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Actually, there should never be any impdef stuff here. Skip it
+ // temporary to workaround PR11902.
+ if (MO.isImplicit())
+ continue;
+ RegList.push_back(MO.getReg());
+ }
break;
case ARM::STR_PRE_IMM:
case ARM::STR_PRE_REG:
@@ -1208,8 +1242,11 @@ extern cl::opt<bool> EnableARMEHABI;
#include "ARMGenMCPseudoLowering.inc"
void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- if (MI->getOpcode() != ARM::CONSTPOOL_ENTRY)
- OutStreamer.EmitCodeRegion();
+ // If we just ended a constant pool, mark it as such.
+ if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+ InConstantPool = false;
+ }
// Emit unwinding stuff for frame-related instructions
if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup))
@@ -1565,9 +1602,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
- // Mark the constant pool entry as data if we're not already in a data
- // region.
- OutStreamer.EmitDataRegion();
+ // If this is the first entry of the pool, mark it.
+ if (!InConstantPool) {
+ OutStreamer.EmitDataRegion(MCDR_DataRegion);
+ InConstantPool = true;
+ }
+
OutStreamer.EmitLabel(GetCPISymbol(LabelId));
const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index af3f75a..3555e8f5 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -44,9 +44,12 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
/// MachineFunction.
const MachineConstantPool *MCP;
+ /// InConstantPool - Maintain state when emitting a sequence of constant
+ /// pool entries so we can properly mark them as data regions.
+ bool InConstantPool;
public:
explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL) {
+ : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL), InConstantPool(false) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
}
@@ -70,6 +73,7 @@ public:
bool runOnMachineFunction(MachineFunction &F);
virtual void EmitConstantPool() {} // we emit constant pools customly!
+ virtual void EmitFunctionBodyEnd();
virtual void EmitFunctionEntryLabel();
void EmitStartOfAsmFile(Module &M);
void EmitEndOfAsmFile(Module &M);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c6280f8..057fd71 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -51,9 +51,9 @@ WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
struct ARM_MLxEntry {
- unsigned MLxOpc; // MLA / MLS opcode
- unsigned MulOpc; // Expanded multiplication opcode
- unsigned AddSubOpc; // Expanded add / sub opcode
+ uint16_t MLxOpc; // MLA / MLS opcode
+ uint16_t MulOpc; // Expanded multiplication opcode
+ uint16_t AddSubOpc; // Expanded add / sub opcode
bool NegAcc; // True if the acc is negated before the add / sub.
bool HasLane; // True if instruction has an extra "lane" operand.
};
@@ -795,8 +795,28 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else
llvm_unreachable("Unknown reg class!");
break;
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ // Use aligned spills if the stack can be realigned.
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
@@ -868,6 +888,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
}
break;
case ARM::VST1q64:
+ case ARM::VST1d64TPseudo:
+ case ARM::VST1d64QPseudo:
if (MI->getOperand(0).isFI() &&
MI->getOperand(2).getSubReg() == 0) {
FrameIndex = MI->getOperand(0).getIndex();
@@ -942,8 +964,28 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else
llvm_unreachable("Unknown reg class!");
break;
- case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
.addFrameIndex(FI).addImm(16)
@@ -1016,6 +1058,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
}
break;
case ARM::VLD1q64:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD1d64QPseudo:
if (MI->getOperand(1).isFI() &&
MI->getOperand(0).getSubReg() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
@@ -1531,11 +1575,11 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
/// This will go away once we can teach tblgen how to set the optional CPSR def
/// operand itself.
struct AddSubFlagsOpcodePair {
- unsigned PseudoOpc;
- unsigned MachineOpc;
+ uint16_t PseudoOpc;
+ uint16_t MachineOpc;
};
-static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
+static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::ADDSri, ARM::ADDri},
{ARM::ADDSrr, ARM::ADDrr},
{ARM::ADDSrsi, ARM::ADDrsi},
@@ -1563,14 +1607,9 @@ static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
};
unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
- static const int NPairs =
- sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
- for (AddSubFlagsOpcodePair *OpcPair = &AddSubFlagsOpcodeMap[0],
- *End = &AddSubFlagsOpcodeMap[NPairs]; OpcPair != End; ++OpcPair) {
- if (OldOpc == OpcPair->PseudoOpc) {
- return OpcPair->MachineOpc;
- }
- }
+ for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
+ if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
+ return AddSubFlagsOpcodeMap[i].MachineOpc;
return 0;
}
@@ -1742,20 +1781,33 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
return Offset == 0;
}
+/// analyzeCompare - For a comparison instruction, return the source registers
+/// in SrcReg and SrcReg2 if having two register operands, and the value it
+/// compares against in CmpValue. Return true if the comparison instruction
+/// can be analyzed.
bool ARMBaseInstrInfo::
-AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
- int &CmpValue) const {
+analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
+ int &CmpMask, int &CmpValue) const {
switch (MI->getOpcode()) {
default: break;
case ARM::CMPri:
case ARM::t2CMPri:
SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
CmpMask = ~0;
CmpValue = MI->getOperand(1).getImm();
return true;
+ case ARM::CMPrr:
+ case ARM::t2CMPrr:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = MI->getOperand(1).getReg();
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
case ARM::TSTri:
case ARM::t2TSTri:
SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
CmpMask = MI->getOperand(1).getImm();
CmpValue = 0;
return true;
@@ -1793,20 +1845,67 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
return false;
}
-/// OptimizeCompareInstr - Convert the instruction supplying the argument to the
-/// comparison into one that sets the zero bit in the flags register.
-bool ARMBaseInstrInfo::
-OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
- int CmpValue, const MachineRegisterInfo *MRI) const {
- if (CmpValue != 0)
- return false;
+/// getSwappedCondition - assume the flags are set by MI(a,b), return
+/// the condition code if we modify the instructions such that flags are
+/// set by MI(b,a).
+inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: return ARMCC::AL;
+ case ARMCC::EQ: return ARMCC::EQ;
+ case ARMCC::NE: return ARMCC::NE;
+ case ARMCC::HS: return ARMCC::LS;
+ case ARMCC::LO: return ARMCC::HI;
+ case ARMCC::HI: return ARMCC::LO;
+ case ARMCC::LS: return ARMCC::HS;
+ case ARMCC::GE: return ARMCC::LE;
+ case ARMCC::LT: return ARMCC::GT;
+ case ARMCC::GT: return ARMCC::LT;
+ case ARMCC::LE: return ARMCC::GE;
+ }
+}
+
+/// isRedundantFlagInstr - check whether the first instruction, whose only
+/// purpose is to update flags, can be made redundant.
+/// CMPrr can be made redundant by SUBrr if the operands are the same.
+/// CMPri can be made redundant by SUBri if the operands are the same.
+/// This function can be extended later on.
+inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
+ unsigned SrcReg2, int ImmValue,
+ MachineInstr *OI) {
+ if ((CmpI->getOpcode() == ARM::CMPrr ||
+ CmpI->getOpcode() == ARM::t2CMPrr) &&
+ (OI->getOpcode() == ARM::SUBrr ||
+ OI->getOpcode() == ARM::t2SUBrr) &&
+ ((OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getReg() == SrcReg2) ||
+ (OI->getOperand(1).getReg() == SrcReg2 &&
+ OI->getOperand(2).getReg() == SrcReg)))
+ return true;
- MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
- if (llvm::next(DI) != MRI->def_end())
- // Only support one definition.
- return false;
+ if ((CmpI->getOpcode() == ARM::CMPri ||
+ CmpI->getOpcode() == ARM::t2CMPri) &&
+ (OI->getOpcode() == ARM::SUBri ||
+ OI->getOpcode() == ARM::t2SUBri) &&
+ OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getImm() == ImmValue)
+ return true;
+ return false;
+}
- MachineInstr *MI = &*DI;
+/// optimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register;
+/// Remove a redundant Compare instruction if an earlier instruction can set the
+/// flags in the same way as Compare.
+/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
+/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
+/// condition code of instructions which use the flags.
+bool ARMBaseInstrInfo::
+optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
+ int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const {
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI) return false;
// Masked compares sometimes use the same register as the corresponding 'and'.
if (CmpMask != ~0) {
@@ -1825,32 +1924,49 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
}
}
- // Conservatively refuse to convert an instruction which isn't in the same BB
- // as the comparison.
- if (MI->getParent() != CmpInstr->getParent())
- return false;
-
- // Check that CPSR isn't set between the comparison instruction and the one we
- // want to change.
- MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin();
+ // Get ready to iterate backward from CmpInstr.
+ MachineBasicBlock::iterator I = CmpInstr, E = MI,
+ B = CmpInstr->getParent()->begin();
// Early exit if CmpInstr is at the beginning of the BB.
if (I == B) return false;
+ // There are two possible candidates which can be changed to set CPSR:
+ // One is MI, the other is a SUB instruction.
+ // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
+ // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
+ MachineInstr *Sub = NULL;
+ if (SrcReg2 != 0)
+ // MI is not a candidate for CMPrr.
+ MI = NULL;
+ else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
+ // Conservatively refuse to convert an instruction which isn't in the same
+ // BB as the comparison.
+ // For CMPri, we need to check Sub, thus we can't return here.
+ if (CmpInstr->getOpcode() == ARM::CMPri ||
+ CmpInstr->getOpcode() == ARM::t2CMPri)
+ MI = NULL;
+ else
+ return false;
+ }
+
+ // Check that CPSR isn't set between the comparison instruction and the one we
+ // want to change. At the same time, search for Sub.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
--I;
for (; I != E; --I) {
const MachineInstr &Instr = *I;
- for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
- const MachineOperand &MO = Instr.getOperand(IO);
- if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR))
- return false;
- if (!MO.isReg()) continue;
-
+ if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
+ Instr.readsRegister(ARM::CPSR, TRI))
// This instruction modifies or uses CPSR after the one we want to
// change. We can't do this transformation.
- if (MO.getReg() == ARM::CPSR)
- return false;
+ return false;
+
+ // Check whether CmpInstr can be made redundant by the current instruction.
+ if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
+ Sub = &*I;
+ break;
}
if (I == B)
@@ -1858,7 +1974,13 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
return false;
}
- // Set the "zero" bit in CPSR.
+ // Return false if no candidates exist.
+ if (!MI && !Sub)
+ return false;
+
+ // The single candidate is called MI.
+ if (!MI) MI = Sub;
+
switch (MI->getOpcode()) {
default: break;
case ARM::RSBrr:
@@ -1894,13 +2016,17 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
case ARM::EORri:
case ARM::t2EORrr:
case ARM::t2EORri: {
- // Scan forward for the use of CPSR, if it's a conditional code requires
- // checking of V bit, then this is not safe to do. If we can't find the
- // CPSR use (i.e. used in another block), then it's not safe to perform
- // the optimization.
+ // Scan forward for the use of CPSR
+ // When checking against MI: if it's a conditional code requires
+ // checking of V bit, then this is not safe to do.
+ // It is safe to remove CmpInstr if CPSR is redefined or killed.
+ // If we are done with the basic block, we need to check whether CPSR is
+ // live-out.
+ SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
+ OperandsToUpdate;
bool isSafe = false;
I = CmpInstr;
- E = MI->getParent()->end();
+ E = CmpInstr->getParent()->end();
while (!isSafe && ++I != E) {
const MachineInstr &Instr = *I;
for (unsigned IO = 0, EO = Instr.getNumOperands();
@@ -1918,28 +2044,56 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
}
// Condition code is after the operand before CPSR.
ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
- switch (CC) {
- default:
- isSafe = true;
- break;
- case ARMCC::VS:
- case ARMCC::VC:
- case ARMCC::GE:
- case ARMCC::LT:
- case ARMCC::GT:
- case ARMCC::LE:
- return false;
+ if (Sub) {
+ ARMCC::CondCodes NewCC = getSwappedCondition(CC);
+ if (NewCC == ARMCC::AL)
+ return false;
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
+ // on CMP needs to be updated to be based on SUB.
+ // Push the condition code operands to OperandsToUpdate.
+ // If it is safe to remove CmpInstr, the condition code of these
+ // operands will be modified.
+ if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg)
+ OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)),
+ NewCC));
}
+ else
+ switch (CC) {
+ default:
+ // CPSR can be used multiple times, we should continue.
+ break;
+ case ARMCC::VS:
+ case ARMCC::VC:
+ case ARMCC::GE:
+ case ARMCC::LT:
+ case ARMCC::GT:
+ case ARMCC::LE:
+ return false;
+ }
}
}
- if (!isSafe)
- return false;
+ // If CPSR is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if (!isSafe) {
+ MachineBasicBlock *MBB = CmpInstr->getParent();
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(ARM::CPSR))
+ return false;
+ }
// Toggle the optional operand to CPSR.
MI->getOperand(5).setReg(ARM::CPSR);
MI->getOperand(5).setIsDef(true);
CmpInstr->eraseFromParent();
+
+ // Modify the condition code of operands in OperandsToUpdate.
+ // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
+ // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
+ OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
return true;
}
}
@@ -2071,9 +2225,9 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
const MCInstrDesc &Desc = MI->getDesc();
unsigned Class = Desc.getSchedClass();
- unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
- if (UOps)
- return UOps;
+ int ItinUOps = ItinData->getNumMicroOps(Class);
+ if (ItinUOps >= 0)
+ return ItinUOps;
unsigned Opc = MI->getOpcode();
switch (Opc) {
@@ -2088,7 +2242,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
//
// On Cortex-A8, each pair of register loads / stores can be scheduled on the
// same cycle. The scheduling for the first load / store must be done
- // separately by assuming the the address is not 64-bit aligned.
+ // separately by assuming the address is not 64-bit aligned.
//
// On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
// is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
@@ -2147,19 +2301,19 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
return 2;
// 4 registers would be issued: 2, 2.
// 5 registers would be issued: 2, 2, 1.
- UOps = (NumRegs / 2);
+ int A8UOps = (NumRegs / 2);
if (NumRegs % 2)
- ++UOps;
- return UOps;
+ ++A8UOps;
+ return A8UOps;
} else if (Subtarget.isCortexA9()) {
- UOps = (NumRegs / 2);
+ int A9UOps = (NumRegs / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
if ((NumRegs % 2) ||
!MI->hasOneMemOperand() ||
(*MI->memoperands_begin())->getAlignment() < 8)
- ++UOps;
- return UOps;
+ ++A9UOps;
+ return A9UOps;
} else {
// Assume the worst.
return NumRegs;
@@ -2478,82 +2632,14 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
return II;
}
-int
-ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI, unsigned UseIdx) const {
- if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
- DefMI->isRegSequence() || DefMI->isImplicitDef())
- return 1;
-
- if (!ItinData || ItinData->isEmpty())
- return DefMI->mayLoad() ? 3 : 1;
-
- const MCInstrDesc *DefMCID = &DefMI->getDesc();
- const MCInstrDesc *UseMCID = &UseMI->getDesc();
- const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
- unsigned Reg = DefMO.getReg();
- if (Reg == ARM::CPSR) {
- if (DefMI->getOpcode() == ARM::FMSTAT) {
- // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
- return Subtarget.isCortexA9() ? 1 : 20;
- }
-
- // CPSR set and branch can be paired in the same cycle.
- if (UseMI->isBranch())
- return 0;
-
- // Otherwise it takes the instruction latency (generally one).
- int Latency = getInstrLatency(ItinData, DefMI);
-
- // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
- // its uses. Instructions which are otherwise scheduled between them may
- // incur a code size penalty (not able to use the CPSR setting 16-bit
- // instructions).
- if (Latency > 0 && Subtarget.isThumb2()) {
- const MachineFunction *MF = DefMI->getParent()->getParent();
- if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
- --Latency;
- }
- return Latency;
- }
-
- unsigned DefAlign = DefMI->hasOneMemOperand()
- ? (*DefMI->memoperands_begin())->getAlignment() : 0;
- unsigned UseAlign = UseMI->hasOneMemOperand()
- ? (*UseMI->memoperands_begin())->getAlignment() : 0;
-
- unsigned DefAdj = 0;
- if (DefMI->isBundle()) {
- DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
- if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
- DefMI->isRegSequence() || DefMI->isImplicitDef())
- return 1;
- DefMCID = &DefMI->getDesc();
- }
- unsigned UseAdj = 0;
- if (UseMI->isBundle()) {
- unsigned NewUseIdx;
- const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
- Reg, NewUseIdx, UseAdj);
- if (NewUseMI) {
- UseMI = NewUseMI;
- UseIdx = NewUseIdx;
- UseMCID = &UseMI->getDesc();
- }
- }
-
- int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
- *UseMCID, UseIdx, UseAlign);
- int Adj = DefAdj + UseAdj;
- if (Adj) {
- Latency -= (int)(DefAdj + UseAdj);
- if (Latency < 1)
- return 1;
- }
-
- if (Latency > 1 &&
- (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+/// Return the number of cycles to add to (or subtract from) the static
+/// itinerary based on the def opcode and alignment. The caller will ensure that
+/// adjusted latency is at least one cycle.
+static int adjustDefLatency(const ARMSubtarget &Subtarget,
+ const MachineInstr *DefMI,
+ const MCInstrDesc *DefMCID, unsigned DefAlign) {
+ int Adjust = 0;
+ if (Subtarget.isCortexA8() || Subtarget.isCortexA9()) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
// variants are one cycle cheaper.
switch (DefMCID->getOpcode()) {
@@ -2564,7 +2650,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (ShImm == 0 ||
(ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
- --Latency;
+ --Adjust;
break;
}
case ARM::t2LDRs:
@@ -2574,13 +2660,13 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// Thumb2 mode: lsl only.
unsigned ShAmt = DefMI->getOperand(3).getImm();
if (ShAmt == 0 || ShAmt == 2)
- --Latency;
+ --Adjust;
break;
}
}
}
- if (DefAlign < 8 && Subtarget.isCortexA9())
+ if (DefAlign < 8 && Subtarget.isCortexA9()) {
switch (DefMCID->getOpcode()) {
default: break;
case ARM::VLD1q8:
@@ -2689,10 +2775,101 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD4LNq32_UPD:
// If the address is not 64-bit aligned, the latencies of these
// instructions increases by one.
- ++Latency;
+ ++Adjust;
break;
}
+ }
+ return Adjust;
+}
+
+
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const {
+ // No operand latency. The caller may fall back to getInstrLatency.
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+ unsigned Reg = DefMO.getReg();
+ const MCInstrDesc *DefMCID = &DefMI->getDesc();
+ const MCInstrDesc *UseMCID = &UseMI->getDesc();
+
+ unsigned DefAdj = 0;
+ if (DefMI->isBundle()) {
+ DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
+ DefMCID = &DefMI->getDesc();
+ }
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef()) {
+ return 1;
+ }
+
+ unsigned UseAdj = 0;
+ if (UseMI->isBundle()) {
+ unsigned NewUseIdx;
+ const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
+ Reg, NewUseIdx, UseAdj);
+ if (!NewUseMI)
+ return -1;
+
+ UseMI = NewUseMI;
+ UseIdx = NewUseIdx;
+ UseMCID = &UseMI->getDesc();
+ }
+
+ if (Reg == ARM::CPSR) {
+ if (DefMI->getOpcode() == ARM::FMSTAT) {
+ // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
+ return Subtarget.isCortexA9() ? 1 : 20;
+ }
+
+ // CPSR set and branch can be paired in the same cycle.
+ if (UseMI->isBranch())
+ return 0;
+
+ // Otherwise it takes the instruction latency (generally one).
+ unsigned Latency = getInstrLatency(ItinData, DefMI);
+
+ // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
+ // its uses. Instructions which are otherwise scheduled between them may
+ // incur a code size penalty (not able to use the CPSR setting 16-bit
+ // instructions).
+ if (Latency > 0 && Subtarget.isThumb2()) {
+ const MachineFunction *MF = DefMI->getParent()->getParent();
+ if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ --Latency;
+ }
+ return Latency;
+ }
+
+ if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit())
+ return -1;
+
+ unsigned DefAlign = DefMI->hasOneMemOperand()
+ ? (*DefMI->memoperands_begin())->getAlignment() : 0;
+ unsigned UseAlign = UseMI->hasOneMemOperand()
+ ? (*UseMI->memoperands_begin())->getAlignment() : 0;
+ // Get the itinerary's latency if possible, and handle variable_ops.
+ int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
+ *UseMCID, UseIdx, UseAlign);
+ // Unable to find operand latency. The caller may resort to getInstrLatency.
+ if (Latency < 0)
+ return Latency;
+
+ // Adjust for IT block position.
+ int Adj = DefAdj + UseAdj;
+
+ // Adjust for dynamic def-side opcode variants not captured by the itinerary.
+ Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
+ if (Adj >= 0 || (int)Latency > -Adj) {
+ return Latency + Adj;
+ }
+ // Return the itinerary latency, which may be zero but not less than zero.
return Latency;
}
@@ -2892,22 +3069,20 @@ ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
return 1;
// If the second MI is predicated, then there is an implicit use dependency.
- return getOperandLatency(ItinData, DefMI, DefIdx, DepMI,
- DepMI->getNumOperands());
+ return getInstrLatency(ItinData, DefMI);
}
-int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI,
- unsigned *PredCost) const {
+unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
if (MI->isCopyLike() || MI->isInsertSubreg() ||
MI->isRegSequence() || MI->isImplicitDef())
return 1;
- if (!ItinData || ItinData->isEmpty())
- return 1;
-
+ // An instruction scheduler typically runs on unbundled instructions, however
+ // other passes may query the latency of a bundled instruction.
if (MI->isBundle()) {
- int Latency = 0;
+ unsigned Latency = 0;
MachineBasicBlock::const_instr_iterator I = MI;
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
@@ -2918,15 +3093,33 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
}
const MCInstrDesc &MCID = MI->getDesc();
- unsigned Class = MCID.getSchedClass();
- unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
- if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)))
+ if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
*PredCost = 1;
- if (UOps)
- return ItinData->getStageLatency(Class);
- return getNumMicroOps(ItinData, MI);
+ }
+ // Be sure to call getStageLatency for an empty itinerary in case it has a
+ // valid MinLatency property.
+ if (!ItinData)
+ return MI->mayLoad() ? 3 : 1;
+
+ unsigned Class = MCID.getSchedClass();
+
+ // For instructions with variable uops, use uops as latency.
+ if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
+ return getNumMicroOps(ItinData, MI);
+
+ // For the common case, fall back on the itinerary's latency.
+ unsigned Latency = ItinData->getStageLatency(Class);
+
+ // Adjust for dynamic def-side opcode variants not captured by the itinerary.
+ unsigned DefAlign = MI->hasOneMemOperand()
+ ? (*MI->memoperands_begin())->getAlignment() : 0;
+ int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign);
+ if (Adj >= 0 || (int)Latency > -Adj) {
+ return Latency + Adj;
+ }
+ return Latency;
}
int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
@@ -2960,7 +3153,10 @@ hasHighOperandLatency(const InstrItineraryData *ItinData,
return true;
// Hoist VFP / NEON instructions with 4 or higher latency.
- int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx,
+ /*FindMin=*/false);
+ if (Latency < 0)
+ Latency = getInstrLatency(ItinData, DefMI);
if (Latency <= 3)
return false;
return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 2fe8507..1a10a4a 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -186,16 +186,20 @@ public:
return NumCycles == 1;
}
- /// AnalyzeCompare - For a comparison instruction, return the source register
- /// in SrcReg and the value it compares against in CmpValue. Return true if
- /// the comparison instruction can be analyzed.
- virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
- int &CmpMask, int &CmpValue) const;
-
- /// OptimizeCompareInstr - Convert the instruction to set the zero flag so
- /// that we can remove a "comparison with zero".
- virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
- int CmpMask, int CmpValue,
+ /// analyzeCompare - For a comparison instruction, return the source registers
+ /// in SrcReg and SrcReg2 if having two register operands, and the value it
+ /// compares against in CmpValue. Return true if the comparison instruction
+ /// can be analyzed.
+ virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ unsigned &SrcReg2, int &CmpMask,
+ int &CmpValue) const;
+
+ /// optimizeCompareInstr - Convert the instruction to set the zero flag so
+ /// that we can remove a "comparison with zero"; Remove a redundant CMP
+ /// instruction if the flags can be updated in the same way by an earlier
+ /// instruction such as SUB.
+ virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ unsigned SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const;
/// FoldImmediate - 'Reg' is known to be defined by a move immediate
@@ -249,8 +253,9 @@ private:
const MCInstrDesc &UseMCID,
unsigned UseIdx, unsigned UseAlign) const;
- int getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI, unsigned *PredCost = 0) const;
+ unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = 0) const;
int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 3907f75..9deb96e 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -62,12 +62,26 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const uint16_t*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- return (STI.isTargetIOS()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+ bool ghcCall = false;
+
+ if (MF) {
+ const Function *F = MF->getFunction();
+ ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
+ }
+
+ if (ghcCall) {
+ return CSR_GHC_SaveList;
+ }
+ else {
+ return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+ ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+ }
}
const uint32_t*
ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
- return (STI.isTargetIOS()) ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
+ return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+ ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
}
BitVector ARMBaseRegisterInfo::
@@ -257,8 +271,9 @@ ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
}
const TargetRegisterClass *
-ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
- return ARM::GPRRegisterClass;
+ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
+ return &ARM::GPRRegClass;
}
const TargetRegisterClass *
@@ -369,7 +384,7 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
};
// We only support even/odd hints for GPR and rGPR.
- if (RC != ARM::GPRRegisterClass && RC != ARM::rGPRRegisterClass)
+ if (RC != &ARM::GPRRegClass && RC != &ARM::rGPRRegClass)
return RC->getRawAllocationOrder(MF);
if (HintType == ARMRI::RegPairEven) {
@@ -712,6 +727,11 @@ requiresRegisterScavenging(const MachineFunction &MF) const {
}
bool ARMBaseRegisterInfo::
+trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return true;
+}
+
+bool ARMBaseRegisterInfo::
requiresFrameIndexScavenging(const MachineFunction &MF) const {
return true;
}
@@ -932,7 +952,8 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this));
+ const MachineFunction &MF = *MBB->getParent();
+ MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg)
.addFrameIndex(FrameIdx).addImm(Offset));
@@ -1110,7 +1131,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Must be addrmode4/6.
MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
else {
- ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
+ ScratchReg = MF.getRegInfo().createVirtualRegister(&ARM::GPRRegClass);
if (!AFI->isThumbFunction())
emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
Offset, Pred, PredReg, TII);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index af79351..da29f7e 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -109,7 +109,8 @@ public:
SmallVectorImpl<unsigned> &SubIndices,
unsigned &NewSubIdx) const;
- const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+ const TargetRegisterClass*
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
const TargetRegisterClass*
getCrossCopyRegClass(const TargetRegisterClass *RC) const;
@@ -173,6 +174,8 @@ public:
virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
+ virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
+
virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index b9a2512..bda1517 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -79,6 +79,25 @@ def RetFastCC_ARM_APCS : CallingConv<[
CCDelegateTo<RetCC_ARM_APCS>
]>;
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for GHC
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_APCS_GHC : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
+ CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>,
+ CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim
+ CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>>
+]>;
//===----------------------------------------------------------------------===//
// ARM AAPCS (EABI) Calling Convention, common parts
@@ -113,6 +132,9 @@ def RetCC_ARM_AAPCS_Common : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_ARM_AAPCS : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
@@ -138,6 +160,9 @@ def RetCC_ARM_AAPCS : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_ARM_AAPCS_VFP : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
@@ -171,3 +196,9 @@ def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
// Also save R7-R4 first to match the stack frame fixed spill areas.
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
+
+// GHC set of callee saved regs is empty as all those regs are
+// used for passing STG regs around
+// add is a workaround for not being able to compile empty list:
+// def CSR_GHC : CalleeSavedRegs<()>;
+def CSR_GHC : CalleeSavedRegs<(add)>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 32ef345..e81b4cc 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -254,7 +254,7 @@ namespace {
emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
return 0;
}
- unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
int32_t Imm12 = MO1.getImm();
uint32_t Binary;
Binary = Imm12 & 0xfff;
@@ -296,7 +296,7 @@ namespace {
emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
return 0;
}
- unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
int32_t Imm12 = MO1.getImm();
// Special value for #-0
@@ -352,6 +352,12 @@ namespace {
void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
intptr_t JTBase = 0) const;
+ unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const;
};
}
@@ -440,7 +446,7 @@ unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
const MachineOperand &MO) const {
if (MO.isReg())
- return getARMRegisterNumbering(MO.getReg());
+ return II->getRegisterInfo().getEncodingValue(MO.getReg());
else if (MO.isImm())
return static_cast<unsigned>(MO.getImm());
else if (MO.isGlobal())
@@ -776,7 +782,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
// Encode Rn which is PC.
- Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift;
// Encode the displacement.
Binary |= 1 << ARMII::I_BitShift;
@@ -963,7 +969,7 @@ unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
if (Rs) {
// Encode Rs bit[11:8].
assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
- return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
}
// Encode shift_imm bit[11:7].
@@ -1014,7 +1020,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
else if (ImplicitRd)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
if (MCID.Opcode == ARM::MOVi16) {
// Get immediate from MI.
@@ -1064,7 +1070,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
if (!isUnary) {
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
else {
Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
++OpIdx;
@@ -1081,7 +1087,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
if (MO.isReg()) {
// Encode register Rm.
- emitWordLE(Binary | getARMRegisterNumbering(MO.getReg()));
+ emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg()));
return;
}
@@ -1124,14 +1130,14 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
// Set first operand
if (ImplicitRd)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
else
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
// Set second operand
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
else
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
@@ -1158,7 +1164,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
Binary |= 1 << ARMII::I_BitShift;
assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
// Set bit[3:0] to the corresponding Rm register
- Binary |= getARMRegisterNumbering(MO2.getReg());
+ Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
// If this instr is in scaled register offset/index instruction, set
// shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
@@ -1202,7 +1208,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
// Set second operand
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
else
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
@@ -1221,7 +1227,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
// If this instr is in register offset/index encoding, set bit[3:0]
// to the corresponding Rm register.
if (MO2.getReg()) {
- Binary |= getARMRegisterNumbering(MO2.getReg());
+ Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
emitWordLE(Binary);
return;
}
@@ -1287,7 +1293,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || MO.isImplicit())
break;
- unsigned RegNum = getARMRegisterNumbering(MO.getReg());
+ unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg());
assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
RegNum < 16);
Binary |= 0x1 << RegNum;
@@ -1530,7 +1536,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR)
// The return register is LR.
- Binary |= getARMRegisterNumbering(ARM::LR);
+ Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR);
else
// otherwise, set the return register
Binary |= getMachineOpValue(MI, 0);
@@ -1538,11 +1544,12 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
emitWordLE(Binary);
}
-static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegD = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- bool isSPVFP = ARM::SPRRegisterClass->contains(RegD);
- RegD = getARMRegisterNumbering(RegD);
+ bool isSPVFP = ARM::SPRRegClass.contains(RegD);
+ RegD = II->getRegisterInfo().getEncodingValue(RegD);
if (!isSPVFP)
Binary |= RegD << ARMII::RegRdShift;
else {
@@ -1552,11 +1559,12 @@ static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
return Binary;
}
-static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegN = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- bool isSPVFP = ARM::SPRRegisterClass->contains(RegN);
- RegN = getARMRegisterNumbering(RegN);
+ bool isSPVFP = ARM::SPRRegClass.contains(RegN);
+ RegN = II->getRegisterInfo().getEncodingValue(RegN);
if (!isSPVFP)
Binary |= RegN << ARMII::RegRnShift;
else {
@@ -1566,11 +1574,12 @@ static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
return Binary;
}
-static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegM = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- bool isSPVFP = ARM::SPRRegisterClass->contains(RegM);
- RegM = getARMRegisterNumbering(RegM);
+ bool isSPVFP = ARM::SPRRegClass.contains(RegM);
+ RegM = II->getRegisterInfo().getEncodingValue(RegM);
if (!isSPVFP)
Binary |= RegM;
else {
@@ -1757,28 +1766,31 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
emitWordLE(Binary);
}
-static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegD = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- RegD = getARMRegisterNumbering(RegD);
+ RegD = II->getRegisterInfo().getEncodingValue(RegD);
Binary |= (RegD & 0xf) << ARMII::RegRdShift;
Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
return Binary;
}
-static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegN = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- RegN = getARMRegisterNumbering(RegN);
+ RegN = II->getRegisterInfo().getEncodingValue(RegN);
Binary |= (RegN & 0xf) << ARMII::RegRnShift;
Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
return Binary;
}
-static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegM = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- RegM = getARMRegisterNumbering(RegM);
+ RegM = II->getRegisterInfo().getEncodingValue(RegM);
Binary |= (RegM & 0xf);
Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
return Binary;
@@ -1812,7 +1824,7 @@ void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
- RegT = getARMRegisterNumbering(RegT);
+ RegT = II->getRegisterInfo().getEncodingValue(RegT);
Binary |= (RegT << ARMII::RegRdShift);
Binary |= encodeNEONRn(MI, RegNOpIdx);
@@ -1841,7 +1853,7 @@ void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
unsigned RegT = MI.getOperand(1).getReg();
- RegT = getARMRegisterNumbering(RegT);
+ RegT = II->getRegisterInfo().getEncodingValue(RegT);
Binary |= (RegT << ARMII::RegRdShift);
Binary |= encodeNEONRn(MI, 0);
emitWordLE(Binary);
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index fc35c7c..a953985 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -69,27 +69,6 @@ static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
return 0;
}
-/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact,
-/// add padding such that:
-///
-/// 1. The result is aligned to 1 << LogAlign.
-///
-/// 2. No other value of the unknown bits would require more padding.
-///
-/// This may add more padding than is required to satisfy just one of the
-/// constraints. It is necessary to compute alignment this way to guarantee
-/// that we don't underestimate the padding before an aligned block. If the
-/// real padding before a block is larger than we think, constant pool entries
-/// may go out of range.
-static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign,
- unsigned KnownBits) {
- // Add the worst possible padding that the unknown bits could cause.
- Offset += UnknownPadding(LogAlign, KnownBits);
-
- // Then align the result.
- return RoundUpToAlignment(Offset, 1u << LogAlign);
-}
-
namespace {
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
/// requires constant pool entries to be scattered among the instructions
@@ -109,7 +88,12 @@ namespace {
/// Offset - Distance from the beginning of the function to the beginning
/// of this basic block.
///
- /// The offset is always aligned as required by the basic block.
+ /// Offsets are computed assuming worst case padding before an aligned
+ /// block. This means that subtracting basic block offsets always gives a
+ /// conservative estimate of the real distance which may be smaller.
+ ///
+ /// Because worst case padding is used, the computed offset of an aligned
+ /// block may not actually be aligned.
unsigned Offset;
/// Size - Size of the basic block in bytes. If the block contains
@@ -140,7 +124,12 @@ namespace {
/// This number should be used to predict worst case padding when
/// splitting the block.
unsigned internalKnownBits() const {
- return Unalign ? Unalign : KnownBits;
+ unsigned Bits = Unalign ? Unalign : KnownBits;
+ // If the block size isn't a multiple of the known bits, assume the
+ // worst case padding.
+ if (Size & ((1u << Bits) - 1))
+ Bits = CountTrailingZeros_32(Size);
+ return Bits;
}
/// Compute the offset immediately following this block. If LogAlign is
@@ -152,7 +141,7 @@ namespace {
if (!LA)
return PO;
// Add alignment padding from the terminator.
- return WorstCaseAlign(PO, LA, internalKnownBits());
+ return PO + UnknownPadding(LA, internalKnownBits());
}
/// Compute the number of known low bits of postOffset. If this block
@@ -342,9 +331,7 @@ void ARMConstantIslands::verify() {
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock *MBB = MBBI;
- unsigned Align = MBB->getAlignment();
unsigned MBBId = MBB->getNumber();
- assert(BBInfo[MBBId].Offset % (1u << Align) == 0);
assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
}
DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
@@ -428,7 +415,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
// ARM and Thumb2 functions need to be 4-byte aligned.
if (!isThumb1)
- MF->EnsureAlignment(2); // 2 = log2(4)
+ MF->ensureAlignment(2); // 2 = log2(4)
// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
@@ -529,7 +516,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// The function needs to be as aligned as the basic blocks. The linker may
// move functions around based on their alignment.
- MF->EnsureAlignment(BB->getAlignment());
+ MF->ensureAlignment(BB->getAlignment());
// Order the entries in BB by descending alignment. That ensures correct
// alignment of all entries as long as BB is sufficiently aligned. Keep
@@ -828,7 +815,7 @@ void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
// tBR_JTr contains a .align 2 directive.
if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
BBI.PostAlign = 2;
- MBB->getParent()->EnsureAlignment(2);
+ MBB->getParent()->ensureAlignment(2);
}
}
@@ -1045,7 +1032,6 @@ bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
MachineInstr *CPEMI, unsigned MaxDisp,
bool NegOk, bool DoDump) {
unsigned CPEOffset = getOffsetOf(CPEMI);
- assert(CPEOffset % 4 == 0 && "Misaligned CPE");
if (DoDump) {
DEBUG({
@@ -1256,11 +1242,8 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
if (BBHasFallthrough(UserMBB)) {
// Size of branch to insert.
unsigned Delta = isThumb1 ? 2 : 4;
- // End of UserBlock after adding a branch.
- unsigned UserBlockEnd = UserBBI.postOffset() + Delta;
// Compute the offset where the CPE will begin.
- unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign,
- UserBBI.postKnownBits());
+ unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta;
if (isOffsetInRange(UserOffset, CPEOffset, U)) {
DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
@@ -1299,20 +1282,16 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// up the insertion point.
// Try to split the block so it's fully aligned. Compute the latest split
- // point where we can add a 4-byte branch instruction, and then
- // WorstCaseAlign to LogAlign.
+ // point where we can add a 4-byte branch instruction, and then align to
+ // LogAlign which is the largest possible alignment in the function.
unsigned LogAlign = MF->getAlignment();
assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
unsigned KnownBits = UserBBI.internalKnownBits();
unsigned UPad = UnknownPadding(LogAlign, KnownBits);
- unsigned BaseInsertOffset = UserOffset + U.getMaxDisp();
+ unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad;
DEBUG(dbgs() << format("Split in middle of big block before %#x",
BaseInsertOffset));
- // Account for alignment and unknown padding.
- BaseInsertOffset &= ~((1u << LogAlign) - 1);
- BaseInsertOffset -= UPad;
-
// The 4 in the following is for the unconditional branch we'll be inserting
// (allows for long branch on Thumb1). Alignment of the island is handled
// inside isOffsetInRange.
@@ -1327,11 +1306,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// pool entries following this block; only the last one is in the water list.
// Back past any possible branches (allow for a conditional and a maximally
// long unconditional).
- if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset)
- BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset -
- (isThumb1 ? 6 : 8);
- unsigned EndInsertOffset =
- WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) +
+ if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
+ BaseInsertOffset = UserBBI.postOffset() - UPad - 8;
+ DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
+ }
+ unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
CPEMI->getOperand(2).getImm();
MachineBasicBlock::iterator MI = UserMI;
++MI;
@@ -1342,6 +1321,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
Offset < BaseInsertOffset;
Offset += TII->GetInstSizeInBytes(MI),
MI = llvm::next(MI)) {
+ assert(MI != UserMBB->end() && "Fell off end of block");
if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
CPUser &U = CPUsers[CPUIndex];
if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
@@ -1353,9 +1333,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// reused within the block, but it doesn't matter much. Also assume CPEs
// are added in order with alignment padding. We may eventually be able
// to pack the aligned CPEs better.
- EndInsertOffset = RoundUpToAlignment(EndInsertOffset,
- 1u << getCPELogAlign(U.CPEMI)) +
- U.CPEMI->getOperand(2).getImm();
+ EndInsertOffset += U.CPEMI->getOperand(2).getImm();
CPUIndex++;
}
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 5fc0360..15bb32e 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -459,22 +459,23 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MIB.addOperand(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
- MIB.addReg(D0);
+ MIB.addReg(D0, getUndefRegState(SrcIsUndef));
if (NumRegs > 1 && TableEntry->copyAllListRegs)
- MIB.addReg(D1);
+ MIB.addReg(D1, getUndefRegState(SrcIsUndef));
if (NumRegs > 2 && TableEntry->copyAllListRegs)
- MIB.addReg(D2);
+ MIB.addReg(D2, getUndefRegState(SrcIsUndef));
if (NumRegs > 3 && TableEntry->copyAllListRegs)
- MIB.addReg(D3);
+ MIB.addReg(D3, getUndefRegState(SrcIsUndef));
// Copy the predicate operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
- if (SrcIsKill) // Add an implicit kill for the super-reg.
+ if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
MIB->addRegisterKilled(SrcReg, TRI, true);
TransferImpOps(MI, MIB, MIB);
@@ -925,7 +926,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
if (isARM) {
AddDefaultPred(MIB3);
if (Opcode == ARM::MOV_ga_pcrel_ldr)
- MIB2->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB3->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
}
TransferImpOps(MI, MIB1, MIB3);
MI.eraseFromParent();
@@ -1008,7 +1009,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
unsigned OpIdx = 0;
unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
+ unsigned Lane = TRI->getEncodingValue(SrcReg) & 1;
unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
Lane & 1 ? ARM::ssub_1 : ARM::ssub_0,
&ARM::DPR_VFP2RegClass);
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 2e1eaca..57f8116 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -47,11 +47,6 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-static cl::opt<bool>
-DisableARMFastISel("disable-arm-fast-isel",
- cl::desc("Turn off experimental ARM fast-isel support"),
- cl::init(false), cl::Hidden);
-
extern cl::opt<bool> EnableARMLongCalls;
namespace {
@@ -92,8 +87,9 @@ class ARMFastISel : public FastISel {
LLVMContext *Context;
public:
- explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
- : FastISel(funcInfo),
+ explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FastISel(funcInfo, libInfo),
TM(funcInfo.MF->getTarget()),
TII(*TM.getInstrInfo()),
TLI(*TM.getTargetLowering()) {
@@ -172,6 +168,7 @@ class ARMFastISel : public FastISel {
bool SelectRet(const Instruction *I);
bool SelectTrunc(const Instruction *I);
bool SelectIntExt(const Instruction *I);
+ bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
// Utility routines.
private:
@@ -182,7 +179,6 @@ class ARMFastISel : public FastISel {
bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment = 0, bool isZExt = true,
bool allocReg = true);
-
bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
unsigned Alignment = 0);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
@@ -195,21 +191,25 @@ class ARMFastISel : public FastISel {
unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
- unsigned ARMSelectCallOp(const GlobalValue *GV);
+ unsigned ARMSelectCallOp(bool UseReg);
// Call handling routines.
private:
- CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg);
bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
SmallVectorImpl<unsigned> &ArgRegs,
SmallVectorImpl<MVT> &ArgVTs,
SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
SmallVectorImpl<unsigned> &RegArgs,
CallingConv::ID CC,
- unsigned &NumBytes);
+ unsigned &NumBytes,
+ bool isVarArg);
+ unsigned getLibcallReg(const Twine &Name);
bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const Instruction *I, CallingConv::ID CC,
- unsigned &NumBytes);
+ unsigned &NumBytes, bool isVarArg);
bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
// OptionalDef handling routines.
@@ -719,7 +719,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
MVT VT;
- if (!isLoadTypeLegal(AI->getType(), VT)) return false;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
@@ -910,8 +910,9 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
// put the alloca address into a register, set the base type back to
// register and continue. This should almost never happen.
if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
- const TargetRegisterClass *RC = isThumb2 ? ARM::tGPRRegisterClass
- : ARM::GPRRegisterClass;
+ const TargetRegisterClass *RC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned ResultReg = createResultReg(RC);
unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -1005,7 +1006,7 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
useAM3 = true;
}
}
- RC = ARM::GPRRegisterClass;
+ RC = &ARM::GPRRegClass;
break;
case MVT::i16:
if (isThumb2) {
@@ -1017,7 +1018,7 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
useAM3 = true;
}
- RC = ARM::GPRRegisterClass;
+ RC = &ARM::GPRRegClass;
break;
case MVT::i32:
if (isThumb2) {
@@ -1028,7 +1029,7 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
} else {
Opc = ARM::LDRi12;
}
- RC = ARM::GPRRegisterClass;
+ RC = &ARM::GPRRegClass;
break;
case MVT::f32:
if (!Subtarget->hasVFP2()) return false;
@@ -1037,7 +1038,7 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
needVMOV = true;
VT = MVT::i32;
Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
- RC = ARM::GPRRegisterClass;
+ RC = &ARM::GPRRegClass;
} else {
Opc = ARM::VLDRS;
RC = TLI.getRegClassFor(VT);
@@ -1106,8 +1107,9 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
// This is mostly going to be Neon/vector support.
default: return false;
case MVT::i1: {
- unsigned Res = createResultReg(isThumb2 ? ARM::tGPRRegisterClass :
- ARM::GPRRegisterClass);
+ unsigned Res = createResultReg(isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass);
unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), Res)
@@ -1358,7 +1360,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
.addReg(AddrReg));
- return true;
+ return true;
}
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
@@ -1423,12 +1425,12 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
if (!UseImm)
CmpOpc = ARM::t2CMPrr;
else
- CmpOpc = isNegativeImm ? ARM::t2CMNzri : ARM::t2CMPri;
+ CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
} else {
if (!UseImm)
CmpOpc = ARM::CMPrr;
else
- CmpOpc = isNegativeImm ? ARM::CMNzri : ARM::CMPri;
+ CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
}
break;
}
@@ -1491,8 +1493,9 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
// Now set a register based on the comparison. Explicitly set the predicates
// here.
unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
- const TargetRegisterClass *RC = isThumb2 ? ARM::rGPRRegisterClass
- : ARM::GPRRegisterClass;
+ const TargetRegisterClass *RC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned DestReg = createResultReg(RC);
Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
unsigned ZeroReg = TargetMaterializeConstant(Zero);
@@ -1516,7 +1519,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) {
unsigned Op = getRegForValue(V);
if (Op == 0) return false;
- unsigned Result = createResultReg(ARM::DPRRegisterClass);
+ unsigned Result = createResultReg(&ARM::DPRRegClass);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::VCVTDS), Result)
.addReg(Op));
@@ -1535,7 +1538,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
unsigned Op = getRegForValue(V);
if (Op == 0) return false;
- unsigned Result = createResultReg(ARM::SPRRegisterClass);
+ unsigned Result = createResultReg(&ARM::SPRRegClass);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::VCVTSD), Result)
.addReg(Op));
@@ -1736,7 +1739,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
// type and the target independent selector doesn't know how to handle it.
if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
return false;
-
+
unsigned Opc;
switch (ISDOpcode) {
default: return false;
@@ -1809,10 +1812,11 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
// Call Handling Code
-// This is largely taken directly from CCAssignFnForNode - we don't support
-// varargs in FastISel so that part has been removed.
+// This is largely taken directly from CCAssignFnForNode
// TODO: We may not support all of this.
-CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
+CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg) {
switch (CC) {
default:
llvm_unreachable("Unsupported calling convention");
@@ -1825,18 +1829,26 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
// Use target triple & subtarget features to do actual dispatch.
if (Subtarget->isAAPCS_ABI()) {
if (Subtarget->hasVFP2() &&
- TM.Options.FloatABIType == FloatABI::Hard)
+ TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
else
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
} else
return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
case CallingConv::ARM_AAPCS_VFP:
- return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ if (!isVarArg)
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ // Fall through to soft float variant, variadic functions don't
+ // use hard floating point ABI.
case CallingConv::ARM_AAPCS:
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ case CallingConv::GHC:
+ if (Return)
+ llvm_unreachable("Can't return in GHC call convention");
+ else
+ return CC_ARM_APCS_GHC;
}
}
@@ -1846,10 +1858,12 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
SmallVectorImpl<unsigned> &RegArgs,
CallingConv::ID CC,
- unsigned &NumBytes) {
+ unsigned &NumBytes,
+ bool isVarArg) {
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
- CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
+ CCAssignFnForCall(CC, false, isVarArg));
// Check that we can handle all of the arguments. If we can't, then bail out
// now before we add code to the MBB.
@@ -1981,7 +1995,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const Instruction *I, CallingConv::ID CC,
- unsigned &NumBytes) {
+ unsigned &NumBytes, bool isVarArg) {
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -1991,8 +2005,8 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
// Now the return value.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
- CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
// Copy all of the result registers out of their specified physreg.
if (RVLocs.size() == 2 && RetVT == MVT::f64) {
@@ -2041,9 +2055,6 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
- if (F.isVarArg())
- return false;
-
CallingConv::ID CC = F.getCallingConv();
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
@@ -2053,7 +2064,8 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
- CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
+ F.isVarArg()));
const Value *RV = Ret->getOperand(0);
unsigned Reg = getRegForValue(RV);
@@ -2110,12 +2122,17 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
return true;
}
-unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
- if (isThumb2) {
- return ARM::tBL;
- } else {
- return ARM::BL;
- }
+unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
+ if (UseReg)
+ return isThumb2 ? ARM::tBLXr : ARM::BLX;
+ else
+ return isThumb2 ? ARM::tBL : ARM::BL;
+}
+
+unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
+ GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
+ GlobalValue::ExternalLinkage, 0, Name);
+ return ARMMaterializeGV(GV, TLI.getValueType(GV->getType()));
}
// A quick function that will emit a call for a named libcall in F with the
@@ -2136,8 +2153,14 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
else if (!isTypeLegal(RetTy, RetVT))
return false;
- // TODO: For now if we have long calls specified we don't handle the call.
- if (EnableARMLongCalls) return false;
+ // Can't handle non-double multi-reg retvals.
+ if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
+ if (RVLocs.size() >= 2 && RetVT != MVT::f64)
+ return false;
+ }
// Set up the argument vectors.
SmallVector<Value*, 8> Args;
@@ -2170,23 +2193,36 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// Handle the arguments now that we've gotten them.
SmallVector<unsigned, 4> RegArgs;
unsigned NumBytes;
- if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
+ RegArgs, CC, NumBytes, false))
return false;
+ unsigned CalleeReg = 0;
+ if (EnableARMLongCalls) {
+ CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
+ if (CalleeReg == 0) return false;
+ }
+
// Issue the call.
- MachineInstrBuilder MIB;
- unsigned CallOpc = ARMSelectCallOp(NULL);
- if (isThumb2)
- // Explicitly adding the predicate here.
- MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CallOpc)))
- .addExternalSymbol(TLI.getLibcallName(Call));
- else
+ unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(CallOpc));
+ if (isThumb2) {
// Explicitly adding the predicate here.
- MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CallOpc))
- .addExternalSymbol(TLI.getLibcallName(Call)));
+ AddDefaultPred(MIB);
+ if (EnableARMLongCalls)
+ MIB.addReg(CalleeReg);
+ else
+ MIB.addExternalSymbol(TLI.getLibcallName(Call));
+ } else {
+ if (EnableARMLongCalls)
+ MIB.addReg(CalleeReg);
+ else
+ MIB.addExternalSymbol(TLI.getLibcallName(Call));
+ // Explicitly adding the predicate here.
+ AddDefaultPred(MIB);
+ }
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
@@ -2197,7 +2233,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
- if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false;
// Set all unused physreg defs as dead.
static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
@@ -2213,22 +2249,15 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// Can't handle inline asm.
if (isa<InlineAsm>(Callee)) return false;
- // Only handle global variable Callees.
- const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
- if (!GV)
- return false;
-
// Check the calling convention.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();
// TODO: Avoid some calling conventions?
- // Let SDISel handle vararg functions.
PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- if (FTy->isVarArg())
- return false;
+ bool isVarArg = FTy->isVarArg();
// Handle *simple* calls for now.
Type *RetTy = I->getType();
@@ -2239,8 +2268,15 @@ bool ARMFastISel::SelectCall(const Instruction *I,
RetVT != MVT::i8 && RetVT != MVT::i1)
return false;
- // TODO: For now if we have long calls specified we don't handle the call.
- if (EnableARMLongCalls) return false;
+ // Can't handle non-double multi-reg retvals.
+ if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
+ RetVT != MVT::i16 && RetVT != MVT::i32) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
+ if (RVLocs.size() >= 2 && RetVT != MVT::f64)
+ return false;
+ }
// Set up the argument vectors.
SmallVector<Value*, 8> Args;
@@ -2295,33 +2331,49 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// Handle the arguments now that we've gotten them.
SmallVector<unsigned, 4> RegArgs;
unsigned NumBytes;
- if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
+ RegArgs, CC, NumBytes, isVarArg))
return false;
+ bool UseReg = false;
+ const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+ if (!GV || EnableARMLongCalls) UseReg = true;
+
+ unsigned CalleeReg = 0;
+ if (UseReg) {
+ if (IntrMemName)
+ CalleeReg = getLibcallReg(IntrMemName);
+ else
+ CalleeReg = getRegForValue(Callee);
+
+ if (CalleeReg == 0) return false;
+ }
+
// Issue the call.
- MachineInstrBuilder MIB;
- unsigned CallOpc = ARMSelectCallOp(GV);
- // Explicitly adding the predicate here.
+ unsigned CallOpc = ARMSelectCallOp(UseReg);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(CallOpc));
if(isThumb2) {
// Explicitly adding the predicate here.
- MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CallOpc)));
- if (!IntrMemName)
+ AddDefaultPred(MIB);
+ if (UseReg)
+ MIB.addReg(CalleeReg);
+ else if (!IntrMemName)
MIB.addGlobalAddress(GV, 0, 0);
- else
+ else
MIB.addExternalSymbol(IntrMemName, 0);
} else {
- if (!IntrMemName)
- // Explicitly adding the predicate here.
- MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CallOpc))
- .addGlobalAddress(GV, 0, 0));
+ if (UseReg)
+ MIB.addReg(CalleeReg);
+ else if (!IntrMemName)
+ MIB.addGlobalAddress(GV, 0, 0);
else
- MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CallOpc))
- .addExternalSymbol(IntrMemName, 0));
+ MIB.addExternalSymbol(IntrMemName, 0);
+
+ // Explicitly adding the predicate here.
+ AddDefaultPred(MIB);
}
-
+
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
@@ -2332,7 +2384,8 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
- if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
+ return false;
// Set all unused physreg defs as dead.
static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
@@ -2383,6 +2436,42 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
// FIXME: Handle more intrinsics.
switch (I.getIntrinsicID()) {
default: return false;
+ case Intrinsic::frameaddress: {
+ MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ unsigned LdrOpc;
+ const TargetRegisterClass *RC;
+ if (isThumb2) {
+ LdrOpc = ARM::t2LDRi12;
+ RC = (const TargetRegisterClass*)&ARM::tGPRRegClass;
+ } else {
+ LdrOpc = ARM::LDRi12;
+ RC = (const TargetRegisterClass*)&ARM::GPRRegClass;
+ }
+
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
+ unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
+ unsigned SrcReg = FramePtr;
+
+ // Recursively load frame address
+ // ldr r0 [fp]
+ // ldr r0 [r0]
+ // ldr r0 [r0]
+ // ...
+ unsigned DestReg;
+ unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
+ while (Depth--) {
+ DestReg = createResultReg(RC);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(LdrOpc), DestReg)
+ .addReg(SrcReg).addImm(0));
+ SrcReg = DestReg;
+ }
+ UpdateValueMap(&I, SrcReg);
+ return true;
+ }
case Intrinsic::memcpy:
case Intrinsic::memmove: {
const MemTransferInst &MTI = cast<MemTransferInst>(I);
@@ -2406,10 +2495,10 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
return true;
}
}
-
+
if (!MTI.getLength()->getType()->isIntegerTy(32))
return false;
-
+
if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
return false;
@@ -2421,20 +2510,24 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
// Don't handle volatile.
if (MSI.isVolatile())
return false;
-
+
if (!MSI.getLength()->getType()->isIntegerTy(32))
return false;
-
+
if (MSI.getDestAddressSpace() > 255)
return false;
-
+
return SelectCall(&I, "memset");
}
+ case Intrinsic::trap: {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::TRAP));
+ return true;
+ }
}
}
bool ARMFastISel::SelectTrunc(const Instruction *I) {
- // The high bits for a type smaller than the register size are assumed to be
+ // The high bits for a type smaller than the register size are assumed to be
// undefined.
Value *Op = I->getOperand(0);
@@ -2522,6 +2615,61 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
return true;
}
+bool ARMFastISel::SelectShift(const Instruction *I,
+ ARM_AM::ShiftOpc ShiftTy) {
+ // We handle thumb2 mode by target independent selector
+ // or SelectionDAG ISel.
+ if (isThumb2)
+ return false;
+
+ // Only handle i32 now.
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+ if (DestVT != MVT::i32)
+ return false;
+
+ unsigned Opc = ARM::MOVsr;
+ unsigned ShiftImm;
+ Value *Src2Value = I->getOperand(1);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
+ ShiftImm = CI->getZExtValue();
+
+ // Fall back to selection DAG isel if the shift amount
+ // is zero or greater than the width of the value type.
+ if (ShiftImm == 0 || ShiftImm >=32)
+ return false;
+
+ Opc = ARM::MOVsi;
+ }
+
+ Value *Src1Value = I->getOperand(0);
+ unsigned Reg1 = getRegForValue(Src1Value);
+ if (Reg1 == 0) return false;
+
+ unsigned Reg2;
+ if (Opc == ARM::MOVsr) {
+ Reg2 = getRegForValue(Src2Value);
+ if (Reg2 == 0) return false;
+ }
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ if(ResultReg == 0) return false;
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Reg1);
+
+ if (Opc == ARM::MOVsi)
+ MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
+ else if (Opc == ARM::MOVsr) {
+ MIB.addReg(Reg2);
+ MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
+ }
+
+ AddOptionalDefs(MIB);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
// TODO: SoftFP support.
bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
@@ -2582,6 +2730,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
case Instruction::ZExt:
case Instruction::SExt:
return SelectIntExt(I);
+ case Instruction::Shl:
+ return SelectShift(I, ARM_AM::lsl);
+ case Instruction::LShr:
+ return SelectShift(I, ARM_AM::lsr);
+ case Instruction::AShr:
+ return SelectShift(I, ARM_AM::asr);
default: break;
}
return false;
@@ -2625,7 +2779,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
// See if we can handle this address.
Address Addr;
if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
-
+
unsigned ResultReg = MI->getOperand(0).getReg();
if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
return false;
@@ -2634,15 +2788,15 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
}
namespace llvm {
- FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+ FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) {
// Completely untested on non-iOS.
const TargetMachine &TM = funcInfo.MF->getTarget();
// Darwin and thumb1 only for now.
const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
- if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only() &&
- !DisableARMFastISel)
- return new ARMFastISel(funcInfo);
+ if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only())
+ return new ARMFastISel(funcInfo, libInfo);
return 0;
}
}
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 402ecb0..aee72d2 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -15,6 +15,8 @@
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Function.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -151,6 +153,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
int FramePtrSpillFI = 0;
int D8SpillFI = 0;
+ // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ return;
+
// Allocate the vararg register save area. This is not counted in NumBytes.
if (VARegSaveSize)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
@@ -354,6 +360,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ return;
+
if (!AFI->hasStackFrame()) {
if (NumBytes != 0)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
@@ -790,7 +800,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// The writeback is only needed when emitting two vst1.64 instructions.
if (NumAlignedDPRCS2Regs >= 6) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
- ARM::QQPRRegisterClass);
+ &ARM::QQPRRegClass);
MBB.addLiveIn(SupReg);
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),
ARM::R4)
@@ -808,7 +818,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// 16-byte aligned vst1.64 with 4 d-regs, no writeback.
if (NumAlignedDPRCS2Regs >= 4) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
- ARM::QQPRRegisterClass);
+ &ARM::QQPRRegClass);
MBB.addLiveIn(SupReg);
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
.addReg(ARM::R4).addImm(16).addReg(NextReg)
@@ -820,7 +830,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// 16-byte aligned vst1.64 with 2 d-regs.
if (NumAlignedDPRCS2Regs >= 2) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
- ARM::QPRRegisterClass);
+ &ARM::QPRRegClass);
MBB.addLiveIn(SupReg);
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
.addReg(ARM::R4).addImm(16).addReg(SupReg));
@@ -908,7 +918,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
// 16-byte aligned vld1.64 with 4 d-regs and writeback.
if (NumAlignedDPRCS2Regs >= 6) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
- ARM::QQPRRegisterClass);
+ &ARM::QQPRRegClass);
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
.addReg(ARM::R4, RegState::Define)
.addReg(ARM::R4, RegState::Kill).addImm(16)
@@ -924,7 +934,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
// 16-byte aligned vld1.64 with 4 d-regs, no writeback.
if (NumAlignedDPRCS2Regs >= 4) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
- ARM::QQPRRegisterClass);
+ &ARM::QQPRRegClass);
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
.addReg(ARM::R4).addImm(16)
.addReg(SupReg, RegState::ImplicitDefine));
@@ -935,7 +945,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
// 16-byte aligned vld1.64 with 2 d-regs.
if (NumAlignedDPRCS2Regs >= 2) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
- ARM::QPRRegisterClass);
+ &ARM::QPRRegClass);
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
.addReg(ARM::R4).addImm(16));
NextReg += 2;
@@ -1244,7 +1254,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
CanEliminateFrame = false;
}
- if (!ARM::GPRRegisterClass->contains(Reg))
+ if (!ARM::GPRRegClass.contains(Reg))
continue;
if (Spilled) {
@@ -1404,7 +1414,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
} else if (!AFI->isThumb1OnlyFunction()) {
// note: Thumb1 functions spill to R12, not the stack. Reserve a slot
// closest to SP or frame pointer.
- const TargetRegisterClass *RC = ARM::GPRRegisterClass;
+ const TargetRegisterClass *RC = &ARM::GPRRegClass;
RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
false));
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 1eafbbc..ee349a7 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -47,11 +47,6 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
cl::desc("Check fp vmla / vmls hazard at isel time"),
cl::init(true));
-static cl::opt<bool>
-DisableARMIntABS("disable-arm-int-abs", cl::Hidden,
- cl::desc("Enable / disable ARM integer abs transform"),
- cl::init(false));
-
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -210,29 +205,29 @@ private:
/// loads of D registers and even subregs and odd subregs of Q registers.
/// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
- unsigned *DOpcodes,
- unsigned *QOpcodes0, unsigned *QOpcodes1);
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
/// SelectVST - Select NEON store intrinsics. NumVecs should
/// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// stores of D registers and even subregs and odd subregs of Q registers.
/// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
- unsigned *DOpcodes,
- unsigned *QOpcodes0, unsigned *QOpcodes1);
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
/// load/store of D registers and Q registers.
SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
bool isUpdating, unsigned NumVecs,
- unsigned *DOpcodes, unsigned *QOpcodes);
+ const uint16_t *DOpcodes, const uint16_t *QOpcodes);
/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
/// should be 2, 3 or 4. The opcode array specifies the instructions used
/// for loading D registers. (Q registers are not supported.)
SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
- unsigned *Opcodes);
+ const uint16_t *Opcodes);
/// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
/// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
@@ -583,8 +578,6 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
}
-
-
//-----
AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
@@ -1597,8 +1590,9 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
}
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
- unsigned *DOpcodes, unsigned *QOpcodes0,
- unsigned *QOpcodes1) {
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0,
+ const uint16_t *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
@@ -1729,8 +1723,9 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
}
SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
- unsigned *DOpcodes, unsigned *QOpcodes0,
- unsigned *QOpcodes1) {
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0,
+ const uint16_t *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
@@ -1875,8 +1870,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
bool isUpdating, unsigned NumVecs,
- unsigned *DOpcodes,
- unsigned *QOpcodes) {
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
@@ -1994,7 +1989,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
}
SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
- unsigned NumVecs, unsigned *Opcodes) {
+ unsigned NumVecs,
+ const uint16_t *Opcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
@@ -2491,14 +2487,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
SDValue XORSrc1 = N->getOperand(1);
EVT VT = N->getValueType(0);
- if (DisableARMIntABS)
- return NULL;
-
if (Subtarget->isThumb1Only())
return NULL;
- if (XORSrc0.getOpcode() != ISD::ADD ||
- XORSrc1.getOpcode() != ISD::SRA)
+ if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
return NULL;
SDValue ADDSrc0 = XORSrc0.getOperand(0);
@@ -2509,16 +2501,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
EVT XType = SRASrc0.getValueType();
unsigned Size = XType.getSizeInBits() - 1;
- if (ADDSrc1 == XORSrc1 &&
- ADDSrc0 == SRASrc0 &&
- XType.isInteger() &&
- SRAConstant != NULL &&
+ if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
+ XType.isInteger() && SRAConstant != NULL &&
Size == SRAConstant->getZExtValue()) {
-
- unsigned Opcode = ARM::ABS;
- if (Subtarget->isThumb2())
- Opcode = ARM::t2ABS;
-
+ unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
}
@@ -2893,176 +2879,199 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2DUP: {
- unsigned Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
- ARM::VLD2DUPd32 };
+ static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
+ ARM::VLD2DUPd32 };
return SelectVLDDup(N, false, 2, Opcodes);
}
case ARMISD::VLD3DUP: {
- unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo,
- ARM::VLD3DUPd32Pseudo };
+ static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
+ ARM::VLD3DUPd16Pseudo,
+ ARM::VLD3DUPd32Pseudo };
return SelectVLDDup(N, false, 3, Opcodes);
}
case ARMISD::VLD4DUP: {
- unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo,
- ARM::VLD4DUPd32Pseudo };
+ static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
+ ARM::VLD4DUPd16Pseudo,
+ ARM::VLD4DUPd32Pseudo };
return SelectVLDDup(N, false, 4, Opcodes);
}
case ARMISD::VLD2DUP_UPD: {
- unsigned Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed,
- ARM::VLD2DUPd32wb_fixed };
+ static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
+ ARM::VLD2DUPd16wb_fixed,
+ ARM::VLD2DUPd32wb_fixed };
return SelectVLDDup(N, true, 2, Opcodes);
}
case ARMISD::VLD3DUP_UPD: {
- unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD,
- ARM::VLD3DUPd32Pseudo_UPD };
+ static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
+ ARM::VLD3DUPd16Pseudo_UPD,
+ ARM::VLD3DUPd32Pseudo_UPD };
return SelectVLDDup(N, true, 3, Opcodes);
}
case ARMISD::VLD4DUP_UPD: {
- unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD,
- ARM::VLD4DUPd32Pseudo_UPD };
+ static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
+ ARM::VLD4DUPd16Pseudo_UPD,
+ ARM::VLD4DUPd32Pseudo_UPD };
return SelectVLDDup(N, true, 4, Opcodes);
}
case ARMISD::VLD1_UPD: {
- unsigned DOpcodes[] = { ARM::VLD1d8wb_fixed, ARM::VLD1d16wb_fixed,
- ARM::VLD1d32wb_fixed, ARM::VLD1d64wb_fixed };
- unsigned QOpcodes[] = { ARM::VLD1q8wb_fixed,
- ARM::VLD1q16wb_fixed,
- ARM::VLD1q32wb_fixed,
- ARM::VLD1q64wb_fixed };
+ static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
+ ARM::VLD1d16wb_fixed,
+ ARM::VLD1d32wb_fixed,
+ ARM::VLD1d64wb_fixed };
+ static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
+ ARM::VLD1q16wb_fixed,
+ ARM::VLD1q32wb_fixed,
+ ARM::VLD1q64wb_fixed };
return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
}
case ARMISD::VLD2_UPD: {
- unsigned DOpcodes[] = { ARM::VLD2d8wb_fixed,
- ARM::VLD2d16wb_fixed,
- ARM::VLD2d32wb_fixed,
- ARM::VLD1q64wb_fixed};
- unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
- ARM::VLD2q16PseudoWB_fixed,
- ARM::VLD2q32PseudoWB_fixed };
+ static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
+ ARM::VLD2d16wb_fixed,
+ ARM::VLD2d32wb_fixed,
+ ARM::VLD1q64wb_fixed};
+ static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
+ ARM::VLD2q16PseudoWB_fixed,
+ ARM::VLD2q32PseudoWB_fixed };
return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
}
case ARMISD::VLD3_UPD: {
- unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
- ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64wb_fixed};
- unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
- ARM::VLD3q16Pseudo_UPD,
- ARM::VLD3q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
- ARM::VLD3q16oddPseudo_UPD,
- ARM::VLD3q32oddPseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
+ ARM::VLD3d16Pseudo_UPD,
+ ARM::VLD3d32Pseudo_UPD,
+ ARM::VLD1q64wb_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+ ARM::VLD3q16oddPseudo_UPD,
+ ARM::VLD3q32oddPseudo_UPD };
return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case ARMISD::VLD4_UPD: {
- unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
- ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64wb_fixed};
- unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
- ARM::VLD4q16Pseudo_UPD,
- ARM::VLD4q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
- ARM::VLD4q16oddPseudo_UPD,
- ARM::VLD4q32oddPseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
+ ARM::VLD4d16Pseudo_UPD,
+ ARM::VLD4d32Pseudo_UPD,
+ ARM::VLD1q64wb_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD };
return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case ARMISD::VLD2LN_UPD: {
- unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd16Pseudo_UPD,
- ARM::VLD2LNd32Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
- ARM::VLD2LNq32Pseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
+ ARM::VLD2LNd16Pseudo_UPD,
+ ARM::VLD2LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
+ ARM::VLD2LNq32Pseudo_UPD };
return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
}
case ARMISD::VLD3LN_UPD: {
- unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd16Pseudo_UPD,
- ARM::VLD3LNd32Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
- ARM::VLD3LNq32Pseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
+ ARM::VLD3LNd16Pseudo_UPD,
+ ARM::VLD3LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
+ ARM::VLD3LNq32Pseudo_UPD };
return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
}
case ARMISD::VLD4LN_UPD: {
- unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd16Pseudo_UPD,
- ARM::VLD4LNd32Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
- ARM::VLD4LNq32Pseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
+ ARM::VLD4LNd16Pseudo_UPD,
+ ARM::VLD4LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
+ ARM::VLD4LNq32Pseudo_UPD };
return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
}
case ARMISD::VST1_UPD: {
- unsigned DOpcodes[] = { ARM::VST1d8wb_fixed, ARM::VST1d16wb_fixed,
- ARM::VST1d32wb_fixed, ARM::VST1d64wb_fixed };
- unsigned QOpcodes[] = { ARM::VST1q8wb_fixed,
- ARM::VST1q16wb_fixed,
- ARM::VST1q32wb_fixed,
- ARM::VST1q64wb_fixed };
+ static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
+ ARM::VST1d16wb_fixed,
+ ARM::VST1d32wb_fixed,
+ ARM::VST1d64wb_fixed };
+ static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
+ ARM::VST1q16wb_fixed,
+ ARM::VST1q32wb_fixed,
+ ARM::VST1q64wb_fixed };
return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
}
case ARMISD::VST2_UPD: {
- unsigned DOpcodes[] = { ARM::VST2d8wb_fixed,
- ARM::VST2d16wb_fixed,
- ARM::VST2d32wb_fixed,
- ARM::VST1q64wb_fixed};
- unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
- ARM::VST2q16PseudoWB_fixed,
- ARM::VST2q32PseudoWB_fixed };
+ static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
+ ARM::VST2d16wb_fixed,
+ ARM::VST2d32wb_fixed,
+ ARM::VST1q64wb_fixed};
+ static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
+ ARM::VST2q16PseudoWB_fixed,
+ ARM::VST2q32PseudoWB_fixed };
return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
}
case ARMISD::VST3_UPD: {
- unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
- ARM::VST3d32Pseudo_UPD,ARM::VST1d64TPseudoWB_fixed};
- unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
- ARM::VST3q16Pseudo_UPD,
- ARM::VST3q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
- ARM::VST3q16oddPseudo_UPD,
- ARM::VST3q32oddPseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
+ ARM::VST3d16Pseudo_UPD,
+ ARM::VST3d32Pseudo_UPD,
+ ARM::VST1d64TPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+ ARM::VST3q16oddPseudo_UPD,
+ ARM::VST3q32oddPseudo_UPD };
return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case ARMISD::VST4_UPD: {
- unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
- ARM::VST4d32Pseudo_UPD,ARM::VST1d64QPseudoWB_fixed};
- unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
- ARM::VST4q16Pseudo_UPD,
- ARM::VST4q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
- ARM::VST4q16oddPseudo_UPD,
- ARM::VST4q32oddPseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
+ ARM::VST4d16Pseudo_UPD,
+ ARM::VST4d32Pseudo_UPD,
+ ARM::VST1d64QPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD };
return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case ARMISD::VST2LN_UPD: {
- unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd16Pseudo_UPD,
- ARM::VST2LNd32Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
- ARM::VST2LNq32Pseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
+ ARM::VST2LNd16Pseudo_UPD,
+ ARM::VST2LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
+ ARM::VST2LNq32Pseudo_UPD };
return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
}
case ARMISD::VST3LN_UPD: {
- unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd16Pseudo_UPD,
- ARM::VST3LNd32Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
- ARM::VST3LNq32Pseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
+ ARM::VST3LNd16Pseudo_UPD,
+ ARM::VST3LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
+ ARM::VST3LNq32Pseudo_UPD };
return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
}
case ARMISD::VST4LN_UPD: {
- unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd16Pseudo_UPD,
- ARM::VST4LNd32Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
- ARM::VST4LNq32Pseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
+ ARM::VST4LNd16Pseudo_UPD,
+ ARM::VST4LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
+ ARM::VST4LNq32Pseudo_UPD };
return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
}
@@ -3179,124 +3188,144 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case Intrinsic::arm_neon_vld1: {
- unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
- ARM::VLD1d32, ARM::VLD1d64 };
- unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
- ARM::VLD1q32, ARM::VLD1q64};
+ static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
+ ARM::VLD1d32, ARM::VLD1d64 };
+ static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
+ ARM::VLD1q32, ARM::VLD1q64};
return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld2: {
- unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
- ARM::VLD2d32, ARM::VLD1q64 };
- unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
- ARM::VLD2q32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
+ ARM::VLD2d32, ARM::VLD1q64 };
+ static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
+ ARM::VLD2q32Pseudo };
return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld3: {
- unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo,
- ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo };
- unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
- ARM::VLD3q16Pseudo_UPD,
- ARM::VLD3q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo,
- ARM::VLD3q16oddPseudo,
- ARM::VLD3q32oddPseudo };
+ static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
+ ARM::VLD3d16Pseudo,
+ ARM::VLD3d32Pseudo,
+ ARM::VLD1d64TPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
+ ARM::VLD3q16oddPseudo,
+ ARM::VLD3q32oddPseudo };
return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld4: {
- unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo,
- ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo };
- unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
- ARM::VLD4q16Pseudo_UPD,
- ARM::VLD4q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo,
- ARM::VLD4q16oddPseudo,
- ARM::VLD4q32oddPseudo };
+ static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
+ ARM::VLD4d16Pseudo,
+ ARM::VLD4d32Pseudo,
+ ARM::VLD1d64QPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
+ ARM::VLD4q16oddPseudo,
+ ARM::VLD4q32oddPseudo };
return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld2lane: {
- unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
- ARM::VLD2LNd32Pseudo };
- unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
+ ARM::VLD2LNd16Pseudo,
+ ARM::VLD2LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
+ ARM::VLD2LNq32Pseudo };
return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld3lane: {
- unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
- ARM::VLD3LNd32Pseudo };
- unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
+ ARM::VLD3LNd16Pseudo,
+ ARM::VLD3LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
+ ARM::VLD3LNq32Pseudo };
return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld4lane: {
- unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
- ARM::VLD4LNd32Pseudo };
- unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
+ ARM::VLD4LNd16Pseudo,
+ ARM::VLD4LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
+ ARM::VLD4LNq32Pseudo };
return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst1: {
- unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
- ARM::VST1d32, ARM::VST1d64 };
- unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
- ARM::VST1q32, ARM::VST1q64 };
+ static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
+ ARM::VST1d32, ARM::VST1d64 };
+ static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
+ ARM::VST1q32, ARM::VST1q64 };
return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
- unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
- ARM::VST2d32, ARM::VST1q64 };
- unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
- ARM::VST2q32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
+ ARM::VST2d32, ARM::VST1q64 };
+ static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+ ARM::VST2q32Pseudo };
return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst3: {
- unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo,
- ARM::VST3d32Pseudo, ARM::VST1d64TPseudo };
- unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
- ARM::VST3q16Pseudo_UPD,
- ARM::VST3q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo,
- ARM::VST3q16oddPseudo,
- ARM::VST3q32oddPseudo };
+ static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
+ ARM::VST3d16Pseudo,
+ ARM::VST3d32Pseudo,
+ ARM::VST1d64TPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
+ ARM::VST3q16oddPseudo,
+ ARM::VST3q32oddPseudo };
return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4: {
- unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
- ARM::VST4d32Pseudo, ARM::VST1d64QPseudo };
- unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
- ARM::VST4q16Pseudo_UPD,
- ARM::VST4q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo,
- ARM::VST4q16oddPseudo,
- ARM::VST4q32oddPseudo };
+ static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
+ ARM::VST4d16Pseudo,
+ ARM::VST4d32Pseudo,
+ ARM::VST1d64QPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
+ ARM::VST4q16oddPseudo,
+ ARM::VST4q32oddPseudo };
return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst2lane: {
- unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
- ARM::VST2LNd32Pseudo };
- unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
+ ARM::VST2LNd16Pseudo,
+ ARM::VST2LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
+ ARM::VST2LNq32Pseudo };
return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst3lane: {
- unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
- ARM::VST3LNd32Pseudo };
- unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
+ ARM::VST3LNd16Pseudo,
+ ARM::VST3LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
+ ARM::VST3LNq32Pseudo };
return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst4lane: {
- unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
- ARM::VST4LNd32Pseudo };
- unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
+ ARM::VST4LNd16Pseudo,
+ ARM::VST4LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
+ ARM::VST4LNq32Pseudo };
return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
}
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index a103c94..c66618a 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -52,6 +52,7 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
+STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
// This option should go away when tail calls fully work.
static cl::opt<bool>
@@ -89,76 +90,71 @@ static const uint16_t GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
-void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
- EVT PromotedBitwiseVT) {
+void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
+ MVT PromotedBitwiseVT) {
if (VT != PromotedLdStVT) {
- setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
- PromotedLdStVT.getSimpleVT());
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
- setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
- PromotedLdStVT.getSimpleVT());
+ setOperationAction(ISD::STORE, VT, Promote);
+ AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
}
- EVT ElemTy = VT.getVectorElementType();
+ MVT ElemTy = VT.getVectorElementType();
if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
- setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
if (ElemTy == MVT::i32) {
- setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
} else {
- setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
- }
- setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
- setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT, VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+ }
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT.isInteger()) {
- setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
}
// Promote all bit-wise operations.
if (VT.isInteger() && VT != PromotedBitwiseVT) {
- setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::AND, VT.getSimpleVT(),
- PromotedBitwiseVT.getSimpleVT());
- setOperationAction(ISD::OR, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::OR, VT.getSimpleVT(),
- PromotedBitwiseVT.getSimpleVT());
- setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
- PromotedBitwiseVT.getSimpleVT());
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
}
// Neon does not support vector divide/remainder operations.
- setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
}
-void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
- addRegisterClass(VT, ARM::DPRRegisterClass);
+void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
+ addRegisterClass(VT, &ARM::DPRRegClass);
addTypeForNEON(VT, MVT::f64, MVT::v2i32);
}
-void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
- addRegisterClass(VT, ARM::QPRRegisterClass);
+void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
+ addRegisterClass(VT, &ARM::QPRRegClass);
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
@@ -431,14 +427,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
if (Subtarget->isThumb1Only())
- addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
+ addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
else
- addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
+ addRegisterClass(MVT::i32, &ARM::GPRRegClass);
if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
!Subtarget->isThumb1Only()) {
- addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
+ addRegisterClass(MVT::f32, &ARM::SPRRegClass);
if (!Subtarget->isFPOnlySP())
- addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+ addRegisterClass(MVT::f64, &ARM::DPRRegClass);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
}
@@ -824,6 +820,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
benefitFromCodePlacementOpt = true;
+ // Prefer likely predicted branches to selects on out-of-order cores.
+ predictableSelectIsExpensive = Subtarget->isCortexA9();
+
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
@@ -849,7 +848,7 @@ ARMTargetLowering::findRepresentativeClass(EVT VT) const{
// the cost is 1 for both f32 and f64.
case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
- RRC = ARM::DPRRegisterClass;
+ RRC = &ARM::DPRRegClass;
// When NEON is used for SP, only half of the register file is available
// because operations that define both SP and DP results will be constrained
// to the VFP2 class (D0-D15). We currently model this constraint prior to
@@ -859,15 +858,15 @@ ARMTargetLowering::findRepresentativeClass(EVT VT) const{
break;
case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
case MVT::v4f32: case MVT::v2f64:
- RRC = ARM::DPRRegisterClass;
+ RRC = &ARM::DPRRegClass;
Cost = 2;
break;
case MVT::v4i64:
- RRC = ARM::DPRRegisterClass;
+ RRC = &ARM::DPRRegClass;
Cost = 4;
break;
case MVT::v8i64:
- RRC = ARM::DPRRegisterClass;
+ RRC = &ARM::DPRRegClass;
Cost = 8;
break;
}
@@ -891,6 +890,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
case ARMISD::CMP: return "ARMISD::CMP";
+ case ARMISD::CMN: return "ARMISD::CMN";
case ARMISD::CMPZ: return "ARMISD::CMPZ";
case ARMISD::CMPFP: return "ARMISD::CMPFP";
case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
@@ -1027,17 +1027,18 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
// load / store 4 to 8 consecutive D registers.
if (Subtarget->hasNEON()) {
if (VT == MVT::v4i64)
- return ARM::QQPRRegisterClass;
- else if (VT == MVT::v8i64)
- return ARM::QQQQPRRegisterClass;
+ return &ARM::QQPRRegClass;
+ if (VT == MVT::v8i64)
+ return &ARM::QQQQPRRegClass;
}
return TargetLowering::getRegClassFor(VT);
}
// Create a fast isel object.
FastISel *
-ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
- return ARM::createFastISel(funcInfo);
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const {
+ return ARM::createFastISel(funcInfo, libInfo);
}
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
@@ -1166,6 +1167,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ case CallingConv::GHC:
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
}
}
@@ -1286,14 +1289,20 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
/// nodes.
SDValue
-ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool doesNotRet = CLI.DoesNotReturn;
+ bool isVarArg = CLI.IsVarArg;
+
MachineFunction &MF = DAG.getMachineFunction();
bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool IsSibCall = false;
@@ -1415,21 +1424,22 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CCInfo.clearFirstByValReg();
}
- unsigned LocMemOffset = VA.getLocMemOffset();
- SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
- SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
- StkPtrOff);
- SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
- SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
- SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
- MVT::i32);
- MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
- Flags.getByValAlign(),
- /*isVolatile=*/false,
- /*AlwaysInline=*/false,
- MachinePointerInfo(0),
- MachinePointerInfo(0)));
-
+ if (Flags.getByValSize() - 4*offset > 0) {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ StkPtrOff);
+ SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
+ SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
+ MVT::i32);
+ SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
+
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
+ MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
+ Ops, array_lengthof(Ops)));
+ }
} else if (!IsSibCall) {
assert(VA.isMemLoc());
@@ -2095,12 +2105,13 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
Args.push_back(Entry);
// FIXME: is there useful debug info available here?
- std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()),
+ TargetLowering::CallLoweringInfo CLI(Chain,
+ (Type *) Type::getInt32Ty(*DAG.getContext()),
false, false, false, false,
0, CallingConv::C, /*isTailCall=*/false,
/*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
}
@@ -2108,7 +2119,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
// "local exec" model.
SDValue
ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG,
+ TLSModel::Model model) const {
const GlobalValue *GV = GA->getGlobal();
DebugLoc dl = GA->getDebugLoc();
SDValue Offset;
@@ -2117,7 +2129,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
// Get the Thread Pointer
SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
- if (GV->isDeclaration()) {
+ if (model == TLSModel::InitialExec) {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
@@ -2142,6 +2154,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
false, false, false, 0);
} else {
// local exec model
+ assert(model == TLSModel::LocalExec);
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
@@ -2162,12 +2175,18 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() &&
"TLS not implemented for non-ELF targets");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
- // If the relocation model is PIC, use the "General Dynamic" TLS Model,
- // otherwise use the "Local Exec" TLS Model
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
- return LowerToTLSGeneralDynamicModel(GA, DAG);
- else
- return LowerToTLSExecModels(GA, DAG);
+
+ TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
+
+ switch (model) {
+ case TLSModel::GeneralDynamic:
+ case TLSModel::LocalDynamic:
+ return LowerToTLSGeneralDynamicModel(GA, DAG);
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModels(GA, DAG, model);
+ }
+ llvm_unreachable("bogus TLS model");
}
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
@@ -2457,9 +2476,9 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
- RC = ARM::tGPRRegisterClass;
+ RC = &ARM::tGPRRegClass;
else
- RC = ARM::GPRRegisterClass;
+ RC = &ARM::GPRRegClass;
// Transform the arguments stored in physical registers into virtual ones.
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
@@ -2543,9 +2562,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
- RC = ARM::tGPRRegisterClass;
+ RC = &ARM::tGPRRegClass;
else
- RC = ARM::GPRRegisterClass;
+ RC = &ARM::GPRRegClass;
unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
@@ -2627,14 +2646,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
const TargetRegisterClass *RC;
if (RegVT == MVT::f32)
- RC = ARM::SPRRegisterClass;
+ RC = &ARM::SPRRegClass;
else if (RegVT == MVT::f64)
- RC = ARM::DPRRegisterClass;
+ RC = &ARM::DPRRegClass;
else if (RegVT == MVT::v2f64)
- RC = ARM::QPRRegisterClass;
+ RC = &ARM::QPRRegClass;
else if (RegVT == MVT::i32)
- RC = (AFI->isThumb1OnlyFunction() ?
- ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
+ RC = AFI->isThumb1OnlyFunction() ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
else
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
@@ -4249,6 +4269,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// Record this extraction against the appropriate vector if possible...
SDValue SourceVec = V.getOperand(0);
+ // If the element number isn't a constant, we can't effectively
+ // analyze what's going on.
+ if (!isa<ConstantSDNode>(V.getOperand(1)))
+ return SDValue();
unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
bool FoundSource = false;
for (unsigned j = 0; j < SourceVecs.size(); ++j) {
@@ -4791,7 +4815,9 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
for (unsigned i = 0; i != NumElts; ++i) {
ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
const APInt &CInt = C->getAPIntValue();
- Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT));
+ // Element types smaller than 32 bits are not legal, so use i32 elements.
+ // The values are implicitly truncated so sext vs. zext doesn't matter.
+ Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
}
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
@@ -5252,14 +5278,14 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
bool isThumb2 = Subtarget->isThumb2();
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- unsigned scratch =
- MRI.createVirtualRegister(isThumb2 ? ARM::rGPRRegisterClass
- : ARM::GPRRegisterClass);
+ unsigned scratch = MRI.createVirtualRegister(isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass);
if (isThumb2) {
- MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(oldval, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(newval, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(oldval, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(newval, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc;
@@ -5362,8 +5388,8 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
if (isThumb2) {
- MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc;
@@ -5394,8 +5420,9 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- const TargetRegisterClass *TRC =
- isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
@@ -5469,8 +5496,8 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
if (isThumb2) {
- MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc, extendOpc;
@@ -5504,8 +5531,9 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- const TargetRegisterClass *TRC =
- isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned scratch2 = MRI.createVirtualRegister(TRC);
@@ -5531,7 +5559,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
// Sign extend the value, if necessary.
if (signExtend && extendOpc) {
- oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass);
+ oldval = MRI.createVirtualRegister(&ARM::GPRRegClass);
AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
.addReg(dest)
.addImm(0));
@@ -5586,9 +5614,9 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
if (isThumb2) {
- MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
}
unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD;
@@ -5614,8 +5642,9 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- const TargetRegisterClass *TRC =
- isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned storesuccess = MRI.createVirtualRegister(TRC);
// thisMBB:
@@ -5722,8 +5751,9 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
- const TargetRegisterClass *TRC =
- isThumb ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ const TargetRegisterClass *TRC = isThumb ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
// Grab constant pool and fixed stack memory operands.
MachineMemOperand *CPMMO =
@@ -5827,8 +5857,9 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MachineFrameInfo *MFI = MF->getFrameInfo();
int FI = MFI->getFunctionContextIndex();
- const TargetRegisterClass *TRC =
- Subtarget->isThumb() ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ const TargetRegisterClass *TRC = Subtarget->isThumb() ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRnopcRegClass;
// Get a mapping of the call site numbers to all of the landing pads they're
// associated with.
@@ -6176,14 +6207,12 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
unsigned Reg = SavedRegs[i];
if (Subtarget->isThumb2() &&
- !ARM::tGPRRegisterClass->contains(Reg) &&
- !ARM::hGPRRegisterClass->contains(Reg))
+ !ARM::tGPRRegClass.contains(Reg) &&
+ !ARM::hGPRRegClass.contains(Reg))
continue;
- else if (Subtarget->isThumb1Only() &&
- !ARM::tGPRRegisterClass->contains(Reg))
+ if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
continue;
- else if (!Subtarget->isThumb() &&
- !ARM::GPRRegisterClass->contains(Reg))
+ if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
continue;
if (!DefRegs[Reg])
MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
@@ -6214,6 +6243,304 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
llvm_unreachable("Expecting a BB with two successors!");
}
+MachineBasicBlock *ARMTargetLowering::
+EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
+ // This pseudo instruction has 3 operands: dst, src, size
+ // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
+ // Otherwise, we will generate unrolled scalar copies.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned src = MI->getOperand(1).getReg();
+ unsigned SizeVal = MI->getOperand(2).getImm();
+ unsigned Align = MI->getOperand(3).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+
+ bool isThumb2 = Subtarget->isThumb2();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned ldrOpc, strOpc, UnitSize = 0;
+
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
+ const TargetRegisterClass *TRC_Vec = 0;
+
+ if (Align & 1) {
+ ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
+ UnitSize = 1;
+ } else if (Align & 2) {
+ ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST;
+ strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
+ UnitSize = 2;
+ } else {
+ // Check whether we can use NEON instructions.
+ if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) &&
+ Subtarget->hasNEON()) {
+ if ((Align % 16 == 0) && SizeVal >= 16) {
+ ldrOpc = ARM::VLD1q32wb_fixed;
+ strOpc = ARM::VST1q32wb_fixed;
+ UnitSize = 16;
+ TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass;
+ }
+ else if ((Align % 8 == 0) && SizeVal >= 8) {
+ ldrOpc = ARM::VLD1d32wb_fixed;
+ strOpc = ARM::VST1d32wb_fixed;
+ UnitSize = 8;
+ TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass;
+ }
+ }
+ // Can't use NEON instructions.
+ if (UnitSize == 0) {
+ ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
+ UnitSize = 4;
+ }
+ }
+
+ unsigned BytesLeft = SizeVal % UnitSize;
+ unsigned LoopSize = SizeVal - BytesLeft;
+
+ if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
+ // Use LDR and STR to copy.
+ // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
+ // [destOut] = STR_POST(scratch, destIn, UnitSize)
+ unsigned srcIn = src;
+ unsigned destIn = dest;
+ for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
+ unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ if (UnitSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc), scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(destIn).addImm(0).addReg(scratch));
+ } else if (isThumb2) {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc), scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addImm(UnitSize));
+ } else {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc), scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0)
+ .addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(UnitSize));
+ }
+ srcIn = srcOut;
+ destIn = destOut;
+ }
+
+ // Handle the leftover bytes with LDRB and STRB.
+ // [scratch, srcOut] = LDRB_POST(srcIn, 1)
+ // [destOut] = STRB_POST(scratch, destIn, 1)
+ ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
+ for (unsigned i = 0; i < BytesLeft; i++) {
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(1));
+ } else {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(1));
+ }
+ srcIn = srcOut;
+ destIn = destOut;
+ }
+ MI->eraseFromParent(); // The instruction is gone now.
+ return BB;
+ }
+
+ // Expand the pseudo op to a loop.
+ // thisMBB:
+ // ...
+ // movw varEnd, # --> with thumb2
+ // movt varEnd, #
+ // ldrcp varEnd, idx --> without thumb2
+ // fallthrough --> loopMBB
+ // loopMBB:
+ // PHI varPhi, varEnd, varLoop
+ // PHI srcPhi, src, srcLoop
+ // PHI destPhi, dst, destLoop
+ // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
+ // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
+ // subs varLoop, varPhi, #UnitSize
+ // bne loopMBB
+ // fallthrough --> exitMBB
+ // exitMBB:
+ // epilogue to handle left-over bytes
+ // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
+ // [destOut] = STRB_POST(scratch, destLoop, 1)
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Load an immediate to varEnd.
+ unsigned varEnd = MRI.createVirtualRegister(TRC);
+ if (isThumb2) {
+ unsigned VReg1 = varEnd;
+ if ((LoopSize & 0xFFFF0000) != 0)
+ VReg1 = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
+ .addImm(LoopSize & 0xFFFF));
+
+ if ((LoopSize & 0xFFFF0000) != 0)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
+ .addReg(VReg1)
+ .addImm(LoopSize >> 16));
+ } else {
+ MachineConstantPool *ConstantPool = MF->getConstantPool();
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+ if (Align == 0)
+ Align = getTargetData()->getTypeAllocSize(C->getType());
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
+ .addReg(varEnd, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
+ }
+ BB->addSuccessor(loopMBB);
+
+ // Generate the loop body:
+ // varPhi = PHI(varLoop, varEnd)
+ // srcPhi = PHI(srcLoop, src)
+ // destPhi = PHI(destLoop, dst)
+ MachineBasicBlock *entryBB = BB;
+ BB = loopMBB;
+ unsigned varLoop = MRI.createVirtualRegister(TRC);
+ unsigned varPhi = MRI.createVirtualRegister(TRC);
+ unsigned srcLoop = MRI.createVirtualRegister(TRC);
+ unsigned srcPhi = MRI.createVirtualRegister(TRC);
+ unsigned destLoop = MRI.createVirtualRegister(TRC);
+ unsigned destPhi = MRI.createVirtualRegister(TRC);
+
+ BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
+ .addReg(varLoop).addMBB(loopMBB)
+ .addReg(varEnd).addMBB(entryBB);
+ BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
+ .addReg(srcLoop).addMBB(loopMBB)
+ .addReg(src).addMBB(entryBB);
+ BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
+ .addReg(destLoop).addMBB(loopMBB)
+ .addReg(dest).addMBB(entryBB);
+
+ // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
+ // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
+ unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
+ if (UnitSize >= 8) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
+ .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0));
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
+ .addReg(destPhi).addImm(0).addReg(scratch));
+ } else if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
+ .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
+ .addReg(scratch).addReg(destPhi)
+ .addImm(UnitSize));
+ } else {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
+ .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0)
+ .addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
+ .addReg(scratch).addReg(destPhi)
+ .addReg(0).addImm(UnitSize));
+ }
+
+ // Decrement loop variable by UnitSize.
+ MachineInstrBuilder MIB = BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
+ AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
+ MIB->getOperand(5).setReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ // loopMBB can loop back to loopMBB or fall through to exitMBB.
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // Add epilogue to handle BytesLeft.
+ BB = exitMBB;
+ MachineInstr *StartOfExit = exitMBB->begin();
+ ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
+
+ // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
+ // [destOut] = STRB_POST(scratch, destLoop, 1)
+ unsigned srcIn = srcLoop;
+ unsigned destIn = destLoop;
+ for (unsigned i = 0; i < BytesLeft; i++) {
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addImm(1));
+ } else {
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(1));
+ }
+ srcIn = srcOut;
+ destIn = destOut;
+ }
+
+ MI->eraseFromParent(); // The instruction is gone now.
+ return BB;
+}
+
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
@@ -6517,10 +6844,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineRegisterInfo &MRI = Fn->getRegInfo();
// In Thumb mode S must not be specified if source register is the SP or
// PC and if destination register is the SP, so restrict register class
- unsigned NewMovDstReg = MRI.createVirtualRegister(
- isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
- unsigned NewRsbDstReg = MRI.createVirtualRegister(
- isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
+ unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass);
// Transfer the remainder of BB and its successor edges to sinkMBB.
SinkBB->splice(SinkBB->begin(), BB,
@@ -6534,12 +6860,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// fall through to SinkMBB
RSBBB->addSuccessor(SinkBB);
- // insert a movs at the end of BB
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr),
- NewMovDstReg)
- .addReg(ABSSrcReg, RegState::Kill)
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addReg(ARM::CPSR, RegState::Define);
+ // insert a cmp at the end of BB
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(ABSSrcReg).addImm(0));
// insert a bcc with opposite CC to ARMCC::MI at the end of BB
BuildMI(BB, dl,
@@ -6551,7 +6875,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// by if-conversion pass
BuildMI(*RSBBB, RSBBB->begin(), dl,
TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
- .addReg(NewMovDstReg, RegState::Kill)
+ .addReg(ABSSrcReg, RegState::Kill)
.addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
// insert PHI in SinkBB,
@@ -6559,7 +6883,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BuildMI(*SinkBB, SinkBB->begin(), dl,
TII->get(ARM::PHI), ABSDstReg)
.addReg(NewRsbDstReg).addMBB(RSBBB)
- .addReg(NewMovDstReg).addMBB(BB);
+ .addReg(ABSSrcReg).addMBB(BB);
// remove ABS instruction
MI->eraseFromParent();
@@ -6567,6 +6891,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// return last added BB
return SinkBB;
}
+ case ARM::COPY_STRUCT_BYVAL_I32:
+ ++NumLoopByVals;
+ return EmitStructByval(MI, BB);
}
}
@@ -7095,8 +7422,12 @@ static SDValue PerformORCombine(SDNode *N,
return COR;
}
+
+ // The code below optimizes (or (and X, Y), Z).
+ // The AND operand needs to have a single user to make these optimizations
+ // profitable.
SDValue N0 = N->getOperand(0);
- if (N0.getOpcode() != ISD::AND)
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
return SDValue();
SDValue N1 = N->getOperand(1);
@@ -7353,7 +7684,7 @@ static SDValue PerformSTORECombine(SDNode *N,
if (St->isVolatile())
return SDValue();
- // Optimize trunc store (of multiple scalars) to shuffle and store. First,
+ // Optimize trunc store (of multiple scalars) to shuffle and store. First,
// pack all of the elements in one place. Next, store to memory in fewer
// chunks.
SDValue StVal = St->getValue();
@@ -8721,12 +9052,19 @@ bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
return Imm >= 0 && Imm <= 255;
}
-/// isLegalAddImmediate - Return true if the specified immediate is legal
-/// add immediate, that is the target has add instructions which can add
-/// a register with the immediate without having to materialize the
+/// isLegalAddImmediate - Return true if the specified immediate is a legal add
+/// *or sub* immediate, that is the target has add or sub instructions which can
+/// add a register with the immediate without having to materialize the
/// immediate into a register.
bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
- return ARM_AM::getSOImmVal(Imm) != -1;
+ // Same encoding for add/sub, just flip the sign.
+ int64_t AbsImm = llvm::abs64(Imm);
+ if (!Subtarget->isThumb())
+ return ARM_AM::getSOImmVal(AbsImm) != -1;
+ if (Subtarget->isThumb2())
+ return ARM_AM::getT2SOImmVal(AbsImm) != -1;
+ // Thumb1 only has 8-bit unsigned immediate.
+ return AbsImm >= 0 && AbsImm <= 255;
}
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
@@ -9030,39 +9368,38 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
switch (Constraint[0]) {
case 'l': // Low regs or general regs.
if (Subtarget->isThumb())
- return RCPair(0U, ARM::tGPRRegisterClass);
- else
- return RCPair(0U, ARM::GPRRegisterClass);
+ return RCPair(0U, &ARM::tGPRRegClass);
+ return RCPair(0U, &ARM::GPRRegClass);
case 'h': // High regs or no regs.
if (Subtarget->isThumb())
- return RCPair(0U, ARM::hGPRRegisterClass);
+ return RCPair(0U, &ARM::hGPRRegClass);
break;
case 'r':
- return RCPair(0U, ARM::GPRRegisterClass);
+ return RCPair(0U, &ARM::GPRRegClass);
case 'w':
if (VT == MVT::f32)
- return RCPair(0U, ARM::SPRRegisterClass);
+ return RCPair(0U, &ARM::SPRRegClass);
if (VT.getSizeInBits() == 64)
- return RCPair(0U, ARM::DPRRegisterClass);
+ return RCPair(0U, &ARM::DPRRegClass);
if (VT.getSizeInBits() == 128)
- return RCPair(0U, ARM::QPRRegisterClass);
+ return RCPair(0U, &ARM::QPRRegClass);
break;
case 'x':
if (VT == MVT::f32)
- return RCPair(0U, ARM::SPR_8RegisterClass);
+ return RCPair(0U, &ARM::SPR_8RegClass);
if (VT.getSizeInBits() == 64)
- return RCPair(0U, ARM::DPR_8RegisterClass);
+ return RCPair(0U, &ARM::DPR_8RegClass);
if (VT.getSizeInBits() == 128)
- return RCPair(0U, ARM::QPR_8RegisterClass);
+ return RCPair(0U, &ARM::QPR_8RegClass);
break;
case 't':
if (VT == MVT::f32)
- return RCPair(0U, ARM::SPRRegisterClass);
+ return RCPair(0U, &ARM::SPRRegClass);
break;
}
}
if (StringRef("{cc}").equals_lower(Constraint))
- return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
+ return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 352d980..51d1205 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -41,6 +41,9 @@ namespace llvm {
// PIC mode.
WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
+ // Add pseudo op to model memcpy for struct byval.
+ COPY_STRUCT_BYVAL,
+
CALL, // Function call.
CALL_PRED, // Function call that's predicable.
CALL_NOLINK, // Function call with branch not branch-and-link.
@@ -53,6 +56,7 @@ namespace llvm {
PIC_ADD, // Add with a PC operand and a PIC label.
CMP, // ARM compare instructions.
+ CMN, // ARM CMN instructions.
CMPZ, // ARM compare that sets only Z flag.
CMPFP, // ARM VFP compare instruction, sets FPSCR.
CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
@@ -357,7 +361,8 @@ namespace llvm {
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
- virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const;
Sched::Preference getSchedulingPreference(SDNode *N) const;
@@ -389,9 +394,9 @@ namespace llvm {
///
unsigned ARMPCLabelIndex;
- void addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT);
- void addDRTypeForNEON(EVT VT);
- void addQRTypeForNEON(EVT VT);
+ void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
+ void addDRTypeForNEON(MVT VT);
+ void addQRTypeForNEON(MVT VT);
typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
@@ -422,7 +427,8 @@ namespace llvm {
SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG) const;
SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
- SelectionDAG &DAG) const;
+ SelectionDAG &DAG,
+ TLSModel::Model model) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -462,13 +468,7 @@ namespace llvm {
unsigned &VARegSize, unsigned &VARegSaveSize) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
/// HandleByVal - Target-specific cleanup for ByVal support.
@@ -532,6 +532,9 @@ namespace llvm {
MachineBasicBlock *MBB) const;
bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitStructByval(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
};
enum NEONModImmType {
@@ -542,7 +545,8 @@ namespace llvm {
namespace ARM {
- FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo);
}
}
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index f04926a..c8966fb 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -827,6 +827,8 @@ class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
let Inst{7-4} = 0b0111;
let Inst{9-8} = 0b00;
let Inst{27-20} = opcod;
+
+ let Unpredictable{9-8} = 0b11;
}
// Misc Arithmetic instructions.
@@ -1862,7 +1864,6 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
string opc, string dt, string asm, string cstr, list<dag> pattern>
: N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
-
// Instruction operands.
bits<5> Vd;
bits<5> Vn;
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index b8f607e..31b0c41 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -31,7 +31,8 @@ ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
if (hasNOP()) {
- NopInst.setOpcode(ARM::NOP);
+ NopInst.setOpcode(ARM::HINT);
+ NopInst.addOperand(MCOperand::CreateImm(0));
NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
NopInst.addOperand(MCOperand::CreateReg(0));
} else {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 1eb561d..6340a58 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -18,6 +18,9 @@
// Type profiles.
def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
+def SDT_ARMStructByVal : SDTypeProfile<0, 4,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
@@ -90,6 +93,10 @@ def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" ,
+ SDT_ARMStructByVal,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+ SDNPMayStore, SDNPMayLoad]>;
def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
@@ -121,6 +128,9 @@ def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
[SDNPOutGlue]>;
+def ARMcmn : SDNode<"ARMISD::CMN", SDT_ARMCmp,
+ [SDNPOutGlue]>;
+
def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
[SDNPOutGlue, SDNPCommutative]>;
@@ -161,53 +171,59 @@ def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
// ARM Instruction Predicate Definitions.
//
def HasV4T : Predicate<"Subtarget->hasV4TOps()">,
- AssemblerPredicate<"HasV4TOps">;
+ AssemblerPredicate<"HasV4TOps", "armv4t">;
def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">,
- AssemblerPredicate<"HasV5TEOps">;
+ AssemblerPredicate<"HasV5TEOps", "armv5te">;
def HasV6 : Predicate<"Subtarget->hasV6Ops()">,
- AssemblerPredicate<"HasV6Ops">;
+ AssemblerPredicate<"HasV6Ops", "armv6">;
def NoV6 : Predicate<"!Subtarget->hasV6Ops()">;
def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
- AssemblerPredicate<"HasV6T2Ops">;
+ AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
def HasV7 : Predicate<"Subtarget->hasV7Ops()">,
- AssemblerPredicate<"HasV7Ops">;
+ AssemblerPredicate<"HasV7Ops", "armv7">;
def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
- AssemblerPredicate<"FeatureVFP2">;
+ AssemblerPredicate<"FeatureVFP2", "VFP2">;
def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
- AssemblerPredicate<"FeatureVFP3">;
+ AssemblerPredicate<"FeatureVFP3", "VFP3">;
def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
- AssemblerPredicate<"FeatureVFP4">;
+ AssemblerPredicate<"FeatureVFP4", "VFP4">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
- AssemblerPredicate<"FeatureNEON">;
+ AssemblerPredicate<"FeatureNEON", "NEON">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
- AssemblerPredicate<"FeatureFP16">;
+ AssemblerPredicate<"FeatureFP16","half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
- AssemblerPredicate<"FeatureHWDiv">;
+ AssemblerPredicate<"FeatureHWDiv", "divide">;
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
- AssemblerPredicate<"FeatureT2XtPk">;
+ AssemblerPredicate<"FeatureT2XtPk",
+ "pack/extract">;
def HasThumb2DSP : Predicate<"Subtarget->hasThumb2DSP()">,
- AssemblerPredicate<"FeatureDSPThumb2">;
+ AssemblerPredicate<"FeatureDSPThumb2",
+ "thumb2-dsp">;
def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
- AssemblerPredicate<"FeatureDB">;
+ AssemblerPredicate<"FeatureDB",
+ "data-barriers">;
def HasMP : Predicate<"Subtarget->hasMPExtension()">,
- AssemblerPredicate<"FeatureMP">;
+ AssemblerPredicate<"FeatureMP",
+ "mp-extensions">;
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
def IsThumb : Predicate<"Subtarget->isThumb()">,
- AssemblerPredicate<"ModeThumb">;
+ AssemblerPredicate<"ModeThumb", "thumb">;
def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
def IsThumb2 : Predicate<"Subtarget->isThumb2()">,
- AssemblerPredicate<"ModeThumb,FeatureThumb2">;
+ AssemblerPredicate<"ModeThumb,FeatureThumb2",
+ "thumb2">;
def IsMClass : Predicate<"Subtarget->isMClass()">,
- AssemblerPredicate<"FeatureMClass">;
+ AssemblerPredicate<"FeatureMClass", "armv7m">;
def IsARClass : Predicate<"!Subtarget->isMClass()">,
- AssemblerPredicate<"!FeatureMClass">;
+ AssemblerPredicate<"!FeatureMClass",
+ "armv7a/r">;
def IsARM : Predicate<"!Subtarget->isThumb()">,
- AssemblerPredicate<"!ModeThumb">;
+ AssemblerPredicate<"!ModeThumb", "arm-mode">;
def IsIOS : Predicate<"Subtarget->isTargetIOS()">;
def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
@@ -220,7 +236,8 @@ def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
// But only select them if more precision in FP computation is allowed.
// Do not use them for Darwin platforms.
-def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision && "
+def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
+ " FPOpFusion::Fast) && "
"!Subtarget->isTargetDarwin()">;
def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
"Subtarget->isTargetDarwin()">;
@@ -236,9 +253,9 @@ class RegConstraint<string C> {
// ARM specific transformation functions and pattern fragments.
//
-// so_imm_neg_XFORM - Return a so_imm value packed into the format described for
-// so_imm_neg def below.
-def so_imm_neg_XFORM : SDNodeXForm<imm, [{
+// imm_neg_XFORM - Return a imm value packed into the format described for
+// imm_neg defs below.
+def imm_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
}]>;
@@ -257,7 +274,7 @@ def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; }
def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
int64_t Value = -(int)N->getZExtValue();
return Value && ARM_AM::getSOImmVal(Value) != -1;
- }], so_imm_neg_XFORM> {
+ }], imm_neg_XFORM> {
let ParserMatchClass = so_imm_neg_asmoperand;
}
@@ -399,8 +416,11 @@ def pclabel : Operand<i32> {
}
// ADR instruction labels.
+def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; }
def adrlabel : Operand<i32> {
let EncoderMethod = "getAdrLabelOpValue";
+ let ParserMatchClass = AdrLabelAsmOperand;
+ let PrintMethod = "printAdrLabelOperand";
}
def neon_vcvt_imm32 : Operand<i32> {
@@ -570,7 +590,10 @@ def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> {
}
/// imm0_15 predicate - Immediate in the range [0,15].
-def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; }
+def Imm0_15AsmOperand: ImmAsmOperand {
+ let Name = "Imm0_15";
+ let DiagnosticType = "ImmRange0_15";
+}
def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 16;
}]> {
@@ -615,6 +638,11 @@ def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_65535AsmOperand;
}
+// imm0_65535_neg - An immediate whose negative value is in the range [0.65535].
+def imm0_65535_neg : Operand<i32>, ImmLeaf<i32, [{
+ return -Imm >= 0 && -Imm < 65536;
+}]>;
+
// imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference
// a relocatable expression.
//
@@ -940,9 +968,10 @@ include "ARMInstrFormats.td"
/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
/// binop that produces a value.
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AsI1_bin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0> {
+ PatFrag opnode, bit Commutable = 0> {
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
@@ -1003,38 +1032,15 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
let Inst{4} = 1;
let Inst{3-0} = shift{3-0};
}
-
- // Assembly aliases for optional destination operand when it's the same
- // as the source operand.
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
- so_imm:$imm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
- GPR:$Rm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
- so_reg_imm:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
- so_reg_reg:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
-
}
/// AsI1_rbin_irs - Same as AsI1_bin_irs except the order of operands are
/// reversed. The 'rr' form is only defined for the disassembler; for codegen
/// it is equivalent to the AsI1_bin_irs counterpart.
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0> {
+ PatFrag opnode, bit Commutable = 0> {
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
@@ -1094,30 +1100,6 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
let Inst{4} = 1;
let Inst{3-0} = shift{3-0};
}
-
- // Assembly aliases for optional destination operand when it's the same
- // as the source operand.
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
- so_imm:$imm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
- GPR:$Rm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
- so_reg_imm:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
- so_reg_reg:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
-
}
/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default.
@@ -1304,8 +1286,9 @@ class AI_exta_rrot_np<bits<8> opcod, string opc>
}
/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
- string baseOpc, bit Commutable = 0> {
+ bit Commutable = 0> {
let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
@@ -1351,7 +1334,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def rsr : AsI1<opcod, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, so_reg_reg:$shift),
DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
- [(set GPRnopc:$Rd, CPSR, (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>,
+ [(set GPRnopc:$Rd, CPSR,
+ (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>,
Requires<[IsARM]> {
bits<4> Rd;
bits<4> Rn;
@@ -1366,34 +1350,11 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{3-0} = shift{3-0};
}
}
-
- // Assembly aliases for optional destination operand when it's the same
- // as the source operand.
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
- so_imm:$imm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
- GPR:$Rm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
- so_reg_imm:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPRnopc:$Rdn, GPRnopc:$Rdn,
- so_reg_reg:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
}
/// AI1_rsc_irs - Define instructions and patterns for rsc
-multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode,
- string baseOpc> {
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
+multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
@@ -1450,29 +1411,6 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{3-0} = shift{3-0};
}
}
-
- // Assembly aliases for optional destination operand when it's the same
- // as the source operand.
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
- so_imm:$imm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
- GPR:$Rm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
- so_reg_imm:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
- so_reg_reg:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
}
let canFoldAsLoad = 1, isReMaterializable = 1 in {
@@ -1511,9 +1449,10 @@ multiclass AI_ldr1nopc<bit isByte, string opc, InstrItinClass iii,
// Note: We use the complex addrmode_imm12 rather than just an input
// GPR and a constrained immediate so that we can use this to match
// frame index references and avoid matching constant pool references.
- def i12: AI2ldst<0b010, 1, isByte, (outs GPRnopc:$Rt), (ins addrmode_imm12:$addr),
+ def i12: AI2ldst<0b010, 1, isByte, (outs GPRnopc:$Rt),
+ (ins addrmode_imm12:$addr),
AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
- [(set GPRnopc:$Rt, (opnode addrmode_imm12:$addr))]> {
+ [(set GPRnopc:$Rt, (opnode addrmode_imm12:$addr))]> {
bits<4> Rt;
bits<17> addr;
let Inst{23} = addr{12}; // U (add = ('U' == 1))
@@ -1521,9 +1460,10 @@ multiclass AI_ldr1nopc<bit isByte, string opc, InstrItinClass iii,
let Inst{15-12} = Rt;
let Inst{11-0} = addr{11-0}; // imm12
}
- def rs : AI2ldst<0b011, 1, isByte, (outs GPRnopc:$Rt), (ins ldst_so_reg:$shift),
- AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
- [(set GPRnopc:$Rt, (opnode ldst_so_reg:$shift))]> {
+ def rs : AI2ldst<0b011, 1, isByte, (outs GPRnopc:$Rt),
+ (ins ldst_so_reg:$shift),
+ AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+ [(set GPRnopc:$Rt, (opnode ldst_so_reg:$shift))]> {
bits<4> Rt;
bits<17> shift;
let shift{4} = 0; // Inst{4} = 0
@@ -1581,9 +1521,10 @@ multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii,
let Inst{15-12} = Rt;
let Inst{11-0} = addr{11-0}; // imm12
}
- def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPRnopc:$Rt, ldst_so_reg:$shift),
- AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
- [(opnode GPRnopc:$Rt, ldst_so_reg:$shift)]> {
+ def rs : AI2ldst<0b011, 0, isByte, (outs),
+ (ins GPRnopc:$Rt, ldst_so_reg:$shift),
+ AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+ [(opnode GPRnopc:$Rt, ldst_so_reg:$shift)]> {
bits<4> Rt;
bits<17> shift;
let shift{4} = 0; // Inst{4} = 0
@@ -1655,33 +1596,18 @@ def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
NoItinerary, []>;
}
-def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", []>,
- Requires<[IsARM, HasV6T2]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{15-8} = 0b11110000;
- let Inst{7-0} = 0b00000000;
+def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary,
+ "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
+ bits<8> imm;
+ let Inst{27-8} = 0b00110010000011110000;
+ let Inst{7-0} = imm;
}
-def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", []>,
- Requires<[IsARM, HasV6T2]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{15-8} = 0b11110000;
- let Inst{7-0} = 0b00000001;
-}
-
-def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", []>,
- Requires<[IsARM, HasV6T2]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{15-8} = 0b11110000;
- let Inst{7-0} = 0b00000010;
-}
-
-def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", []>,
- Requires<[IsARM, HasV6T2]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{15-8} = 0b11110000;
- let Inst{7-0} = 0b00000011;
-}
+def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
"\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
@@ -1694,16 +1620,10 @@ def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
let Inst{27-20} = 0b01101000;
let Inst{7-4} = 0b1011;
let Inst{11-8} = 0b1111;
+ let Unpredictable{11-8} = 0b1111;
}
-def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
- []>, Requires<[IsARM, HasV6T2]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{15-8} = 0b11110000;
- let Inst{7-0} = 0b00000100;
-}
-
-// The i32imm operand $val can be used by a debugger to store more information
+// The 16-bit operand $val can be used by a debugger to store more information
// about the breakpoint.
def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
"bkpt", "\t$val", []>, Requires<[IsARM]> {
@@ -1922,7 +1842,7 @@ let isCall = 1,
// at least be a pseudo instruction expanding to the predicated version
// at MC lowering time.
Defs = [LR], Uses = [SP] in {
- def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+ def BL : ABXI<0b1011, (outs), (ins bl_target:$func),
IIC_Br, "bl\t$func",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsARM]> {
@@ -1932,7 +1852,7 @@ let isCall = 1,
let DecoderMethod = "DecodeBranchImmInstruction";
}
- def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+ def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func),
IIC_Br, "bl", "\t$func",
[(ARMcall_pred tglobaladdr:$func)]>,
Requires<[IsARM]> {
@@ -1942,7 +1862,7 @@ let isCall = 1,
}
// ARMv5T and above
- def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+ def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm,
IIC_Br, "blx\t$func",
[(ARMcall GPR:$func)]>,
Requires<[IsARM, HasV5T]> {
@@ -1951,7 +1871,7 @@ let isCall = 1,
let Inst{3-0} = func;
}
- def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+ def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm,
IIC_Br, "blx", "\t$func",
[(ARMcall_pred GPR:$func)]>,
Requires<[IsARM, HasV5T]> {
@@ -1962,19 +1882,18 @@ let isCall = 1,
// ARMv4T
// Note: Restrict $func to the tGPR regclass to prevent it being in LR.
- def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func),
8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
Requires<[IsARM, HasV4T]>;
// ARMv4
- def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func),
8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
Requires<[IsARM, NoV4T]>;
// mov lr, pc; b if callee is marked noreturn to avoid confusing the
// return stack predictor.
- def BMOVPCB_CALL : ARMPseudoInst<(outs),
- (ins bl_target:$func, variable_ops),
+ def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins bl_target:$func),
8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
Requires<[IsARM]>;
}
@@ -2044,18 +1963,16 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
// Tail calls.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
- def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
- IIC_Br, []>;
+ def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>;
- def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
- IIC_Br, []>;
+ def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>;
- def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops),
+ def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst),
4, IIC_Br, [],
(Bcc br_target:$dst, (ops 14, zero_reg))>,
Requires<[IsARM]>;
- def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+ def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst),
4, IIC_Br, [],
(BX GPR:$dst)>,
Requires<[IsARM]>;
@@ -2509,6 +2426,7 @@ multiclass AI2_stridx<bit isByte, string opc,
let Inst{23} = offset{12};
let Inst{19-16} = addr;
let Inst{11-0} = offset{11-0};
+ let Inst{4} = 0;
let DecoderMethod = "DecodeAddrMode2IdxInstruction";
}
@@ -2768,7 +2686,7 @@ defm STRHT : AI3strT<0b1011, "strht">;
multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
InstrItinClass itin, InstrItinClass itin_upd> {
// IA is the default, so no need for an explicit suffix on the
- // mnemonic here. Without it is the cannonical spelling.
+ // mnemonic here. Without it is the canonical spelling.
def IA :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
@@ -2900,9 +2818,6 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
let Inst{15-12} = Rd;
}
-def : ARMInstAlias<"movs${p} $Rd, $Rm",
- (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>;
-
// A version for the smaller set of tail call registers.
let neverHasSideEffects = 1 in
def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
@@ -3113,10 +3028,10 @@ def UBFX : I<(outs GPR:$Rd),
defm ADD : AsI1_bin_irs<0b0100, "add",
IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(add node:$LHS, node:$RHS)>, "ADD", 1>;
+ BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
defm SUB : AsI1_bin_irs<0b0010, "sub",
IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(sub node:$LHS, node:$RHS)>, "SUB">;
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
// ADD and SUB with 's' bit set.
//
@@ -3134,15 +3049,13 @@ defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
defm ADC : AI1_adde_sube_irs<0b0101, "adc",
- BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>,
- "ADC", 1>;
+ BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>;
defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
- BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
- "SBC">;
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
-defm RSB : AsI1_rbin_irs <0b0011, "rsb",
- IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(sub node:$LHS, node:$RHS)>, "RSB">;
+defm RSB : AsI1_rbin_irs<0b0011, "rsb",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
// FIXME: Eliminate them if we can write def : Pat patterns which defines
// CPSR and the implicit def of CPSR is not needed.
@@ -3150,8 +3063,7 @@ defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
defm RSC : AI1_rsc_irs<0b0111, "rsc",
- BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
- "RSC">;
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
// The assume-no-carry-in form uses the negation of the input since add/sub
@@ -3163,6 +3075,11 @@ def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm),
(SUBSri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm),
+ (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>;
+def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm),
+ (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>;
+
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
@@ -3190,7 +3107,7 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
let Inst{3-0} = Rm;
-
+
let Unpredictable{11-8} = 0b1111;
}
@@ -3355,16 +3272,16 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos),
defm AND : AsI1_bin_irs<0b0000, "and",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(and node:$LHS, node:$RHS)>, "AND", 1>;
+ BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
defm ORR : AsI1_bin_irs<0b1100, "orr",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(or node:$LHS, node:$RHS)>, "ORR", 1>;
+ BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm EOR : AsI1_bin_irs<0b0001, "eor",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(xor node:$LHS, node:$RHS)>, "EOR", 1>;
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
defm BIC : AsI1_bin_irs<0b1110, "bic",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">;
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
// FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just
// like in the actual instruction encoding. The complexity of mapping the mask
@@ -3482,27 +3399,28 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
// FIXME: The v5 pseudos are only necessary for the additional Constraint
// property. Remove them when it's possible to add those properties
-// on an individual MachineInstr, not just an instuction description.
-let isCommutable = 1 in {
-def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm),
- IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
- [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>,
- Requires<[IsARM, HasV6]> {
+// on an individual MachineInstr, not just an instruction description.
+let isCommutable = 1, TwoOperandAliasConstraint = "$Rn = $Rd" in {
+def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
+ [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
let Inst{15-12} = 0b0000;
let Unpredictable{15-12} = 0b1111;
}
let Constraints = "@earlyclobber $Rd" in
def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm,
- pred:$p, cc_out:$s),
- 4, IIC_iMUL32,
- [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
- (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ pred:$p, cc_out:$s),
+ 4, IIC_iMUL32,
+ [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
+ (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
}
def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
- IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
+ IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
Requires<[IsARM, HasV6]> {
bits<4> Ra;
@@ -3511,8 +3429,8 @@ def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
let Constraints = "@earlyclobber $Rd" in
def MLAv5: ARMPseudoExpand<(outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
- 4, IIC_iMAC32,
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
+ 4, IIC_iMAC32,
[(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))],
(MLA GPR:$Rd, GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s)>,
Requires<[IsARM, NoV6]>;
@@ -3630,8 +3548,7 @@ def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
- IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPR:$Rd, (sub GPR:$Ra, (mulhs GPR:$Rn, GPR:$Rm)))]>,
+ IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>,
Requires<[IsARM, HasV6]>;
def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
@@ -3912,49 +3829,85 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs),
def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs),
(CMPrsr GPR:$src, so_reg_reg:$rhs)>;
-// FIXME: We have to be careful when using the CMN instruction and comparison
-// with 0. One would expect these two pieces of code should give identical
-// results:
-//
-// rsbs r1, r1, 0
-// cmp r0, r1
-// mov r0, #0
-// it ls
-// mov r0, #1
-//
-// and:
-//
-// cmn r0, r1
-// mov r0, #0
-// it ls
-// mov r0, #1
-//
-// However, the CMN gives the *opposite* result when r1 is 0. This is because
-// the carry flag is set in the CMP case but not in the CMN case. In short, the
-// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the
-// value of r0 and the carry bit (because the "carry bit" parameter to
-// AddWithCarry is defined as 1 in this case, the carry flag will always be set
-// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is
-// never a "carry" when this AddWithCarry is performed (because the "carry bit"
-// parameter to AddWithCarry is defined as 0).
-//
-// When x is 0 and unsigned:
-//
-// x = 0
-// ~x = 0xFFFF FFFF
-// ~x + 1 = 0x1 0000 0000
-// (-x = 0) != (0x1 0000 0000 = ~x + 1)
-//
-// Therefore, we should disable CMN when comparing against zero, until we can
-// limit when the CMN instruction is used (when we know that the RHS is not 0 or
-// when it's a comparison which doesn't look at the 'carry' flag).
-//
-// (See the ARM docs for the "AddWithCarry" pseudo-code.)
-//
-// This is related to <rdar://problem/7569620>.
-//
-//defm CMN : AI1_cmp_irs<0b1011, "cmn",
-// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+// CMN register-integer
+let isCompare = 1, Defs = [CPSR] in {
+def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi,
+ "cmn", "\t$Rn, $imm",
+ [(ARMcmn GPR:$Rn, so_imm:$imm)]> {
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-0} = imm;
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+// CMN register-register/shift
+def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr,
+ "cmn", "\t$Rn, $Rm",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPR:$Rn, GPR:$Rm)]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let isCommutable = 1;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+def CMNzrsi : AI1<0b1011, (outs),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iCMPsr,
+ "cmn", "\t$Rn, $shift",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPR:$Rn, so_reg_imm:$shift)]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+def CMNzrsr : AI1<0b1011, (outs),
+ (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iCMPsr,
+ "cmn", "\t$Rn, $shift",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPRnopc:$Rn, so_reg_reg:$shift)]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+}
+
+def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
+ (CMNri GPR:$src, so_imm_neg:$imm)>;
+
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
+ (CMNri GPR:$src, so_imm_neg:$imm)>;
// Note that TST/TEQ don't set all the same flags that CMP does!
defm TST : AI1_cmp_irs<0b1000, "tst",
@@ -3964,16 +3917,6 @@ defm TEQ : AI1_cmp_irs<0b1001, "teq",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
-defm CMNz : AI1_cmp_irs<0b1011, "cmn",
- IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
- BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
-
-//def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
-// (CMNri GPR:$src, so_imm_neg:$imm)>;
-
-def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
- (CMNzri GPR:$src, so_imm_neg:$imm)>;
-
// Pseudo i64 compares for some floating point compares.
let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
Defs = [CPSR] in {
@@ -4121,11 +4064,8 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
// Pseudo instruction that combines movs + predicated rsbmi
// to implement integer ABS
-let usesCustomInserter = 1, Defs = [CPSR] in {
-def ABS : ARMPseudoInst<
- (outs GPR:$dst), (ins GPR:$src),
- 8, NoItinerary, []>;
-}
+let usesCustomInserter = 1, Defs = [CPSR] in
+def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
let usesCustomInserter = 1 in {
let Defs = [CPSR] in {
@@ -4242,6 +4182,13 @@ let usesCustomInserter = 1 in {
}
}
+let usesCustomInserter = 1 in {
+ def COPY_STRUCT_BYVAL_I32 : PseudoInst<
+ (outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment),
+ NoItinerary,
+ [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>;
+}
+
let mayLoad = 1 in {
def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
NoItinerary,
@@ -4280,10 +4227,10 @@ def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>,
// SWP/SWPB are deprecated in V6/V7.
let mayLoad = 1, mayStore = 1 in {
-def SWP : AIswp<0, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr),
- "swp", []>;
-def SWPB: AIswp<1, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr),
- "swpb", []>;
+def SWP : AIswp<0, (outs GPRnopc:$Rt),
+ (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>;
+def SWPB: AIswp<1, (outs GPRnopc:$Rt),
+ (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>;
}
//===----------------------------------------------------------------------===//
@@ -4609,8 +4556,8 @@ class MovRRCopro<string opc, bit direction, list<dag> pattern = []>
}
def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
- [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2,
- imm:$CRm)]>;
+ [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt,
+ GPRnopc:$Rt2, imm:$CRm)]>;
def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
@@ -4637,8 +4584,8 @@ class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
}
def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */,
- [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2,
- imm:$CRm)]>;
+ [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt,
+ GPRnopc:$Rt2, imm:$CRm)]>;
def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>;
//===----------------------------------------------------------------------===//
@@ -4658,7 +4605,8 @@ def MRS : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary,
let Unpredictable{11-0} = 0b110100001111;
}
-def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p)>, Requires<[IsARM]>;
+def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p)>,
+ Requires<[IsARM]>;
// The MRSsys instruction is the MRS instruction from the ARM ARM,
// section B9.3.9, with the R bit set to 1.
@@ -5114,7 +5062,7 @@ def : ARMInstAlias<"add${s}${p} $Rd, $imm",
(SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
// Same for CMP <--> CMN via so_imm_neg
def : ARMInstAlias<"cmp${p} $Rd, $imm",
- (CMNzri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+ (CMNri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
def : ARMInstAlias<"cmn${p} $Rd, $imm",
(CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
@@ -5123,6 +5071,7 @@ def : ARMInstAlias<"cmn${p} $Rd, $imm",
// FIXME: We need C++ parser hooks to map the alias to the MOV
// encoding. It seems we should be able to do that sort of thing
// in tblgen, but it could get ugly.
+let TwoOperandAliasConstraint = "$Rm = $Rd" in {
def ASRi : ARMAsmPseudo<"asr${s}${p} $Rd, $Rm, $imm",
(ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p,
cc_out:$s)>;
@@ -5135,8 +5084,10 @@ def LSLi : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rm, $imm",
def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm",
(ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p,
cc_out:$s)>;
+}
def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm",
(ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>;
+let TwoOperandAliasConstraint = "$Rn = $Rd" in {
def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm",
(ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
cc_out:$s)>;
@@ -5149,32 +5100,7 @@ def LSLr : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rn, $Rm",
def RORr : ARMAsmPseudo<"ror${s}${p} $Rd, $Rn, $Rm",
(ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
cc_out:$s)>;
-// shifter instructions also support a two-operand form.
-def : ARMInstAlias<"asr${s}${p} $Rm, $imm",
- (ASRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"lsr${s}${p} $Rm, $imm",
- (LSRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"lsl${s}${p} $Rm, $imm",
- (LSLi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"ror${s}${p} $Rm, $imm",
- (RORi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"asr${s}${p} $Rn, $Rm",
- (ASRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
- cc_out:$s)>;
-def : ARMInstAlias<"lsr${s}${p} $Rn, $Rm",
- (LSRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
- cc_out:$s)>;
-def : ARMInstAlias<"lsl${s}${p} $Rn, $Rm",
- (LSLr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
- cc_out:$s)>;
-def : ARMInstAlias<"ror${s}${p} $Rn, $Rm",
- (RORr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
- cc_out:$s)>;
-
-
-// 'mul' instruction can be specified with only two operands.
-def : ARMInstAlias<"mul${s}${p} $Rn, $Rm",
- (MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p, cc_out:$s)>;
+}
// "neg" is and alias for "rsb rd, rn, #0"
def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index fd8ac0b..3134088 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1962,7 +1962,7 @@ def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
let Inst{4} = Rn{5};
}
-def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
+def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
addrmode6oneL32> {
let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4};
@@ -2300,14 +2300,14 @@ class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
(ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
(ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
@@ -2325,7 +2325,7 @@ class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyD, ValueType TyQ, Intrinsic IntOp>
+ ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
(ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
@@ -2343,7 +2343,7 @@ class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
(ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
@@ -2368,6 +2368,8 @@ class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
// Same as N3VD but no data type.
@@ -2379,6 +2381,8 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, "$Vd, $Vn, $Vm", "",
[(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
@@ -2391,6 +2395,8 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = 0;
}
class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
@@ -2401,6 +2407,8 @@ class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = 0;
}
@@ -2411,6 +2419,8 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -2420,6 +2430,8 @@ class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
OpcodeStr, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
class N3VQSL<bits<2> op21_20, bits<4> op11_8,
@@ -2432,6 +2444,8 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
(ResTy (ShOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = 0;
}
class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
@@ -2443,21 +2457,25 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
(ResTy (ShOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = 0;
}
// Basic 3-register intrinsics, both double- and quad-register.
class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
+ string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
: N3VLane32<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
@@ -2468,7 +2486,7 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
let isCommutable = 0;
}
class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
+ string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
@@ -2479,26 +2497,29 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
}
class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
+ let TwoOperandAliasConstraint = "$Vm = $Vd";
let isCommutable = 0;
}
class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane32<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
@@ -2510,7 +2531,7 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
}
class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane16<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
@@ -2522,11 +2543,12 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
}
class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
+ let TwoOperandAliasConstraint = "$Vm = $Vd";
let isCommutable = 0;
}
@@ -2606,7 +2628,7 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
@@ -2614,7 +2636,7 @@ class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
(Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
@@ -2625,7 +2647,7 @@ class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
// The destination register is also used as the first source operand register.
class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
@@ -2633,7 +2655,7 @@ class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
(OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
@@ -2678,7 +2700,7 @@ class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
// Long Intrinsic-Op vector operations with explicit extend (VABAL).
class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
@@ -2691,7 +2713,7 @@ class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
// a quad-register and is also used as the first source operand register.
class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
@@ -2699,7 +2721,7 @@ class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
(TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
(ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
@@ -2712,7 +2734,7 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
imm:$lane)))))]>;
class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
(ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
@@ -2727,7 +2749,7 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
// Narrowing 3-register intrinsics.
class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
- Intrinsic IntOp, bit Commutable>
+ SDPatternOperator IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
@@ -2780,7 +2802,7 @@ class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
// Long 3-register intrinsics with explicit extend (VABDL).
class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
@@ -2793,7 +2815,7 @@ class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
// Long 3-register intrinsics.
class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
@@ -2802,7 +2824,7 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
}
class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
@@ -2812,7 +2834,7 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
imm:$lane)))))]>;
class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
@@ -2830,6 +2852,8 @@ class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
(TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
@@ -2837,14 +2861,14 @@ class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
(ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
(ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
@@ -2855,7 +2879,7 @@ class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
@@ -2863,7 +2887,7 @@ class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
@@ -2871,6 +2895,7 @@ class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
// Shift by immediate,
// both double- and quad-register.
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Format f, InstrItinClass itin, Operand ImmTy,
string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
@@ -2885,6 +2910,7 @@ class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
(outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
+}
// Long shift by immediate.
class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
@@ -2908,6 +2934,7 @@ class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
// Shift right by immediate and accumulate,
// both double- and quad-register.
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Operand ImmTy, string OpcodeStr, string Dt,
ValueType Ty, SDNode ShOp>
@@ -2924,9 +2951,11 @@ class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
[(set QPR:$Vd, (Ty (add QPR:$src1,
(Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
+}
// Shift by immediate and insert,
// both double- and quad-register.
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Operand ImmTy, Format f, string OpcodeStr, string Dt,
ValueType Ty,SDNode ShOp>
@@ -2941,19 +2970,20 @@ class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
(ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
[(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
+}
// Convert, with fractional bits immediate,
// both double- and quad-register.
class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
- Intrinsic IntOp>
+ SDPatternOperator IntOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
- Intrinsic IntOp>
+ SDPatternOperator IntOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
@@ -3023,7 +3053,7 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4,
InstrItinClass itinD, InstrItinClass itinQ,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
// 64-bit vector types.
def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
@@ -3064,7 +3094,7 @@ multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- Intrinsic IntOp> {
+ SDPatternOperator IntOp> {
def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
itin, OpcodeStr, !strconcat(Dt, "16"),
v8i8, v8i16, IntOp>;
@@ -3152,7 +3182,7 @@ multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0> {
+ SDPatternOperator IntOp, bit Commutable = 0> {
// 64-bit vector types.
def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
OpcodeStr, !strconcat(Dt, "16"),
@@ -3173,7 +3203,7 @@ multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
- Intrinsic IntOp> {
+ SDPatternOperator IntOp> {
// 64-bit vector types.
def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
OpcodeStr, !strconcat(Dt, "16"),
@@ -3194,7 +3224,7 @@ multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
multiclass N3VIntSL_HS<bits<4> op11_8,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
@@ -3210,7 +3240,7 @@ multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0>
+ SDPatternOperator IntOp, bit Commutable = 0>
: N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp, Commutable> {
def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
@@ -3224,7 +3254,7 @@ multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
- Intrinsic IntOp>
+ SDPatternOperator IntOp>
: N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp> {
def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
@@ -3241,7 +3271,7 @@ multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0>
+ SDPatternOperator IntOp, bit Commutable = 0>
: N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp, Commutable> {
def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
@@ -3255,7 +3285,7 @@ multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
- Intrinsic IntOp>
+ SDPatternOperator IntOp>
: N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp> {
def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
@@ -3270,7 +3300,7 @@ multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
// source operand element sizes of 16, 32 and 64 bits:
multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0> {
+ SDPatternOperator IntOp, bit Commutable = 0> {
def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
OpcodeStr, !strconcat(Dt, "16"),
v8i8, v8i16, IntOp, Commutable>;
@@ -3330,7 +3360,7 @@ multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0> {
+ SDPatternOperator IntOp, bit Commutable = 0> {
def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "16"),
v4i32, v4i16, IntOp, Commutable>;
@@ -3341,7 +3371,7 @@ multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
- Intrinsic IntOp> {
+ SDPatternOperator IntOp> {
def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
@@ -3352,7 +3382,7 @@ multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0>
+ SDPatternOperator IntOp, bit Commutable = 0>
: N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
IntOp, Commutable> {
def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
@@ -3363,7 +3393,7 @@ multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
// ....with explicit extend (VABDL).
multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> {
+ SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
OpcodeStr, !strconcat(Dt, "8"),
v8i16, v8i8, IntOp, ExtOp, Commutable>;
@@ -3436,7 +3466,7 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8,
// element sizes of 8, 16 and 32 bits:
multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD, InstrItinClass itinQ,
- string OpcodeStr, string Dt, Intrinsic IntOp,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp,
SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
@@ -3459,7 +3489,7 @@ multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
// element sizes of 8, 16 and 32 bits:
multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD, InstrItinClass itinQ,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
// 64-bit vector types.
def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
@@ -3506,7 +3536,7 @@ multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
// First with only element sizes of 16 and 32 bits:
multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
@@ -3514,7 +3544,7 @@ multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
}
multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
@@ -3524,7 +3554,7 @@ multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
// ....then also with element size of 8 bits:
multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
- string OpcodeStr, string Dt, Intrinsic IntOp>
+ string OpcodeStr, string Dt, SDPatternOperator IntOp>
: N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
@@ -3533,7 +3563,7 @@ multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
// ....with explicit extend (VABAL).
multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> {
+ SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
IntOp, ExtOp, OpNode>;
@@ -3550,7 +3580,7 @@ multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
// element sizes of 8, 16 and 32 bits:
multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
// 64-bit vector types.
def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
@@ -3573,7 +3603,7 @@ multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
// element sizes of 8, 16 and 32 bits:
multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
// 64-bit vector types.
def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
@@ -3668,33 +3698,6 @@ multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
// imm6 = xxxxxx
-
- // Aliases for two-operand forms (source and dest regs the same).
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8"))
- DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16"))
- DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v2i32"))
- DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v1i64"))
- DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
-
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8"))
- QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16"))
- QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32"))
- QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v2i64"))
- QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
}
// Neon Shift-Accumulate vector operations,
@@ -4133,16 +4136,16 @@ def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
Requires<[HasVFP4,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
-def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)),
+def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
(VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasVFP4]>;
-def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)),
+def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
(VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasVFP4]>;
-def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)),
+def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
(VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasVFP4]>;
-def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)),
+def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
(VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasVFP4]>;
@@ -4305,6 +4308,7 @@ def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
// VBIC : Vector Bitwise Bit Clear (AND NOT)
+let TwoOperandAliasConstraint = "$Vn = $Vd" in {
def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
(ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
"vbic", "$Vd, $Vn, $Vm", "",
@@ -4315,6 +4319,7 @@ def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
"vbic", "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (v4i32 (and QPR:$Vn,
(vnotq QPR:$Vm))))]>;
+}
def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
(outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
@@ -4820,14 +4825,14 @@ defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
// VCLZ : Vector Count Leading Zeros
defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
- int_arm_neon_vclz>;
+ ctlz>;
// VCNT : Vector Count One Bits
def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
IIC_VCNTiD, "vcnt", "8",
- v8i8, v8i8, int_arm_neon_vcnt>;
+ v8i8, v8i8, ctpop>;
def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
IIC_VCNTiQ, "vcnt", "8",
- v16i8, v16i8, int_arm_neon_vcnt>;
+ v16i8, v16i8, ctpop>;
// Vector Swap
def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
@@ -5308,6 +5313,9 @@ def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
// VEXT : Vector Extract
+
+// All of these have a two-operand InstAlias.
+let TwoOperandAliasConstraint = "$Vn = $Vd" in {
class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
(ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
@@ -5327,6 +5335,7 @@ class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
bits<4> index;
let Inst{11-8} = index{3-0};
}
+}
def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
let Inst{11-8} = index{3-0};
@@ -5588,82 +5597,87 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
// Vector lengthening move with load, matching extending loads.
// extload, zextload and sextload for a standard lengthening load. Example:
-// Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr))
-// (VMOVLuv8i16 (VLDRD addrmode5:$addr))>;
+// Lengthen_Single<"8", "i16", "8"> =
+// Pat<(v8i16 (extloadvi8 addrmode6:$addr))
+// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
+// (f64 (IMPLICIT_DEF)), (i32 0)))>;
multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
+ let AddedComplexity = 10 in {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
- (VLDRD addrmode5:$addr))>;
+ (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
- (VLDRD addrmode5:$addr))>;
+ (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
(!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
- (VLDRD addrmode5:$addr))>;
+ (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+ }
}
// extload, zextload and sextload for a lengthening load which only uses
// half the lanes available. Example:
// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
-// Pat<(v4i16 (extloadvi8 addrmode5:$addr))
-// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-// (VLDRS addrmode5:$addr),
-// ssub_0)),
+// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
+// (f64 (IMPLICIT_DEF)), (i32 0))),
// dsub_0)>;
multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
string InsnLanes, string InsnTy> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)>;
}
// extload, zextload and sextload for a lengthening load followed by another
// lengthening load, to quadruple the initial length.
//
-// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> =
-// Pat<(v4i32 (extloadvi8 addrmode5:$addr))
-// (EXTRACT_SUBREG (VMOVLuv4i32
-// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-// (VLDRS addrmode5:$addr),
-// ssub_0)),
+// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
+// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
+// (EXTRACT_SUBREG (VMOVLuv4i32
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
+// (f64 (IMPLICIT_DEF)),
+// (i32 0))),
// dsub_0)),
-// qsub_0)>;
+// dsub_0)>;
multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
string Insn2Ty> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0))>;
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0))>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0))>;
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0))>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0))>;
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0))>;
}
// extload, zextload and sextload for a lengthening load followed by another
@@ -5671,45 +5685,43 @@ multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
//
// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
-// Pat<(v4i32 (extloadvi8 addrmode5:$addr))
-// (EXTRACT_SUBREG (VMOVLuv4i32
-// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-// (VLDRS addrmode5:$addr),
-// ssub_0)),
-// dsub_0)),
-// dsub_0)>;
+// Pat<(v2i32 (extloadvi8 addrmode6:$addr))
+// (EXTRACT_SUBREG (VMOVLuv4i32
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
+// (f64 (IMPLICIT_DEF)), (i32 0))),
+// dsub_0)),
+// dsub_0)>;
multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
string Insn2Ty> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0)),
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)),
dsub_0)>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0)),
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)),
dsub_0)>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0)),
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)),
dsub_0)>;
}
-defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16
-defm : Lengthen_Single<"4", "i32", "i16">; // v4i16 -> v4i32
-defm : Lengthen_Single<"2", "i64", "i32">; // v2i32 -> v2i64
+defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
+defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
+defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
-defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16
defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
// Double lengthening - v4i8 -> v4i16 -> v4i32
@@ -5720,18 +5732,18 @@ defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
-def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)),
+def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
- dsub_0)), dsub_0))>;
-def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)),
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
- dsub_0)), dsub_0))>;
-def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)),
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
(VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
- dsub_0)), dsub_0))>;
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
//===----------------------------------------------------------------------===//
// Assembler aliases
@@ -5742,69 +5754,6 @@ def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
(VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
-
-// VADD two-operand aliases.
-def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
- (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
- (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
- (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
- (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
- (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
- (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
- (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
- (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
- (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
- (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// VSUB two-operand aliases.
-def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
- (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
- (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
- (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
- (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
- (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
- (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
- (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
- (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
- (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
- (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// VADDW two-operand aliases.
-def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm",
- (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm",
- (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm",
- (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm",
- (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm",
- (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm",
- (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
-
// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
(VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
@@ -5823,23 +5772,6 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
(VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
// ... two-operand aliases
-def : NEONInstAlias<"vand${p} $Vdn, $Vm",
- (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vand${p} $Vdn, $Vm",
- (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
- (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
- (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"veor${p} $Vdn, $Vm",
- (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"veor${p} $Vdn, $Vm",
- (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
- (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
- (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
(VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
@@ -5853,212 +5785,6 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
(VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-// VMUL two-operand aliases.
-def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm",
- (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm",
- (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm",
- (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm",
- (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
-
-def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm",
- (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm",
- (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm",
- (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm",
- (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
-
-def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm",
- (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm",
- (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
-
-def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane",
- (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm,
- VectorIndex16:$lane, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane",
- (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm,
- VectorIndex16:$lane, pred:$p)>;
-
-def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane",
- (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
- VectorIndex32:$lane, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane",
- (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
- VectorIndex32:$lane, pred:$p)>;
-
-def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane",
- (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
- VectorIndex32:$lane, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane",
- (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
- VectorIndex32:$lane, pred:$p)>;
-
-// VQADD (register) two-operand aliases.
-def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
- (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
- (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
- (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
- (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
- (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
- (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
- (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
- (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
- (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
- (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
- (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
- (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
- (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
- (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
- (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
- (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// VSHL (immediate) two-operand aliases.
-def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
- (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
- (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
- (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
- (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
- (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
- (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
- (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
- (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>;
-
-// VSHL (register) two-operand aliases.
-def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
- (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
- (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
- (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
- (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
- (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
- (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
- (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
- (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
- (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
- (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
- (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
- (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
- (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
- (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
- (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
- (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// VSHR (immediate) two-operand aliases.
-def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
- (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
- (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
- (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
- (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
- (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
- (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
- (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
- (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
- (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
- (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
- (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
- (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
- (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
- (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
- (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
- (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
-
-// VRSHL two-operand aliases.
-def : NEONInstAlias<"vrshl${p}.s8 $Vdn, $Vm",
- (VRSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.s16 $Vdn, $Vm",
- (VRSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.s32 $Vdn, $Vm",
- (VRSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.s64 $Vdn, $Vm",
- (VRSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u8 $Vdn, $Vm",
- (VRSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u16 $Vdn, $Vm",
- (VRSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u32 $Vdn, $Vm",
- (VRSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u64 $Vdn, $Vm",
- (VRSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vrshl${p}.s8 $Vdn, $Vm",
- (VRSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.s16 $Vdn, $Vm",
- (VRSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.s32 $Vdn, $Vm",
- (VRSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.s64 $Vdn, $Vm",
- (VRSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u8 $Vdn, $Vm",
- (VRSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u16 $Vdn, $Vm",
- (VRSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u32 $Vdn, $Vm",
- (VRSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u64 $Vdn, $Vm",
- (VRSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
// VLD1 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
@@ -6223,17 +5949,17 @@ def VST2LNqWB_register_Asm_32 :
// VLD3 all-lanes pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
-def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
(ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD3DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
(ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD3DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
(ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
(ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD3DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
(ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD3DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
(ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD3DUPdWB_fixed_Asm_8 :
@@ -6499,17 +6225,17 @@ def VST3qWB_register_Asm_32 :
// VLD4 all-lanes pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
-def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
(ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD4DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
(ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD4DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
(ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
(ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD4DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
(ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD4DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
(ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD4DUPdWB_fixed_Asm_8 :
@@ -6845,277 +6571,6 @@ def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
(VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
-// Two-operand variants for VEXT
-def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
- (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>;
-def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
- (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>;
-def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
- (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
- (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>;
-def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
- (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>;
-def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
- (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>;
-def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm",
- (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>;
-
-// Two-operand variants for VQDMULH
-def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
- (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
- (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
- (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
- (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// Two-operand variants for VMAX.
-def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm",
- (VMAXsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm",
- (VMAXsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm",
- (VMAXsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm",
- (VMAXuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm",
- (VMAXuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm",
- (VMAXuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm",
- (VMAXfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm",
- (VMAXsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm",
- (VMAXsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm",
- (VMAXsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm",
- (VMAXuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm",
- (VMAXuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm",
- (VMAXuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm",
- (VMAXfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// Two-operand variants for VMIN.
-def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm",
- (VMINsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm",
- (VMINsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm",
- (VMINsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm",
- (VMINuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm",
- (VMINuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm",
- (VMINuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm",
- (VMINfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm",
- (VMINsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm",
- (VMINsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm",
- (VMINsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm",
- (VMINuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm",
- (VMINuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm",
- (VMINuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm",
- (VMINfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// Two-operand variants for VPADD.
-def : NEONInstAlias<"vpadd${p}.i8 $Vdn, $Vm",
- (VPADDi8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vpadd${p}.i16 $Vdn, $Vm",
- (VPADDi16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vpadd${p}.i32 $Vdn, $Vm",
- (VPADDi32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vpadd${p}.f32 $Vdn, $Vm",
- (VPADDf DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-// Two-operand variants for VSRA.
- // Signed.
-def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm",
- (VSRAsv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm",
- (VSRAsv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm",
- (VSRAsv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm",
- (VSRAsv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm",
- (VSRAsv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm",
- (VSRAsv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm",
- (VSRAsv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm",
- (VSRAsv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
- // Unsigned.
-def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm",
- (VSRAuv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm",
- (VSRAuv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm",
- (VSRAuv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm",
- (VSRAuv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm",
- (VSRAuv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm",
- (VSRAuv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm",
- (VSRAuv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm",
- (VSRAuv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
-// Two-operand variants for VSRI.
-def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm",
- (VSRIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm",
- (VSRIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm",
- (VSRIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm",
- (VSRIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm",
- (VSRIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm",
- (VSRIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm",
- (VSRIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm",
- (VSRIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
-// Two-operand variants for VSLI.
-def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm",
- (VSLIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm",
- (VSLIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm",
- (VSLIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm",
- (VSLIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm",
- (VSLIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm",
- (VSLIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm",
- (VSLIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm",
- (VSLIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
-// Two-operand variants for VHSUB.
- // Signed.
-def : NEONInstAlias<"vhsub${p}.s8 $Vdn, $Vm",
- (VHSUBsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.s16 $Vdn, $Vm",
- (VHSUBsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.s32 $Vdn, $Vm",
- (VHSUBsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vhsub${p}.s8 $Vdn, $Vm",
- (VHSUBsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.s16 $Vdn, $Vm",
- (VHSUBsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.s32 $Vdn, $Vm",
- (VHSUBsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
- // Unsigned.
-def : NEONInstAlias<"vhsub${p}.u8 $Vdn, $Vm",
- (VHSUBuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.u16 $Vdn, $Vm",
- (VHSUBuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.u32 $Vdn, $Vm",
- (VHSUBuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vhsub${p}.u8 $Vdn, $Vm",
- (VHSUBuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.u16 $Vdn, $Vm",
- (VHSUBuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.u32 $Vdn, $Vm",
- (VHSUBuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-
-// Two-operand variants for VHADD.
- // Signed.
-def : NEONInstAlias<"vhadd${p}.s8 $Vdn, $Vm",
- (VHADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.s16 $Vdn, $Vm",
- (VHADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.s32 $Vdn, $Vm",
- (VHADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vhadd${p}.s8 $Vdn, $Vm",
- (VHADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.s16 $Vdn, $Vm",
- (VHADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.s32 $Vdn, $Vm",
- (VHADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
- // Unsigned.
-def : NEONInstAlias<"vhadd${p}.u8 $Vdn, $Vm",
- (VHADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.u16 $Vdn, $Vm",
- (VHADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.u32 $Vdn, $Vm",
- (VHADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vhadd${p}.u8 $Vdn, $Vm",
- (VHADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.u16 $Vdn, $Vm",
- (VHADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.u32 $Vdn, $Vm",
- (VHADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// Two-operand variants for VRHADD.
- // Signed.
-def : NEONInstAlias<"vrhadd${p}.s8 $Vdn, $Rm",
- (VRHADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.s16 $Vdn, $Rm",
- (VRHADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.s32 $Vdn, $Rm",
- (VRHADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>;
-
-def : NEONInstAlias<"vrhadd${p}.s8 $Vdn, $Rm",
- (VRHADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.s16 $Vdn, $Rm",
- (VRHADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.s32 $Vdn, $Rm",
- (VRHADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>;
-
- // Unsigned.
-def : NEONInstAlias<"vrhadd${p}.u8 $Vdn, $Rm",
- (VRHADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.u16 $Vdn, $Rm",
- (VRHADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.u32 $Vdn, $Rm",
- (VRHADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>;
-
-def : NEONInstAlias<"vrhadd${p}.u8 $Vdn, $Rm",
- (VRHADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.u16 $Vdn, $Rm",
- (VRHADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.u32 $Vdn, $Rm",
- (VRHADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>;
-
// VSWP allows, but does not require, a type suffix.
defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
(VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 6335229..554f6d9 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -32,9 +32,6 @@ def imm_sr : Operand<i32>, PatLeaf<(imm), [{
let ParserMatchClass = ThumbSRImmAsmOperand;
}
-def imm_neg_XFORM : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
-}]>;
def imm_comp_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
}]>;
@@ -258,16 +255,20 @@ def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", []>,
Requires<[IsThumb2]>;
def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", []>,
- T1SystemEncoding<0x10>; // A8.6.410
+ T1SystemEncoding<0x10>, // A8.6.410
+ Requires<[IsThumb2]>;
def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", []>,
- T1SystemEncoding<0x20>; // A8.6.408
+ T1SystemEncoding<0x20>, // A8.6.408
+ Requires<[IsThumb2]>;
def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", []>,
- T1SystemEncoding<0x30>; // A8.6.409
+ T1SystemEncoding<0x30>, // A8.6.409
+ Requires<[IsThumb2]>;
def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", []>,
- T1SystemEncoding<0x40>; // A8.6.157
+ T1SystemEncoding<0x40>, // A8.6.157
+ Requires<[IsThumb2]>;
// The imm operand $val can be used by a debugger to store more information
// about the breakpoint.
@@ -363,8 +364,8 @@ def : tInstAlias<"sub${p} sp, sp, $imm",
(tSUBspi SP, t_imm0_508s4:$imm, pred:$p)>;
// ADD <Rm>, sp
-def tADDrSP : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPRsp:$sp), IIC_iALUr,
- "add", "\t$Rdn, $sp, $Rn", []>,
+def tADDrSP : T1pI<(outs GPR:$Rdn), (ins GPRsp:$sp, GPR:$Rn), IIC_iALUr,
+ "add", "\t$Rdn, $sp, $Rn", []>,
T1Special<{0,0,?,?}> {
// A8.6.9 Encoding T1
bits<4> Rdn;
@@ -419,34 +420,35 @@ let isCall = 1,
Defs = [LR], Uses = [SP] in {
// Also used for Thumb2
def tBL : TIx2<0b11110, 0b11, 1,
- (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br,
+ (outs), (ins pred:$p, t_bltarget:$func), IIC_Br,
"bl${p}\t$func",
[(ARMtcall tglobaladdr:$func)]>,
Requires<[IsThumb]> {
- bits<22> func;
- let Inst{26} = func{21};
+ bits<24> func;
+ let Inst{26} = func{23};
let Inst{25-16} = func{20-11};
- let Inst{13} = 1;
- let Inst{11} = 1;
+ let Inst{13} = func{22};
+ let Inst{11} = func{21};
let Inst{10-0} = func{10-0};
}
// ARMv5T and above, also used for Thumb2
def tBLXi : TIx2<0b11110, 0b11, 0,
- (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br,
+ (outs), (ins pred:$p, t_blxtarget:$func), IIC_Br,
"blx${p}\t$func",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsThumb, HasV5T]> {
- bits<21> func;
+ bits<24> func;
+ let Inst{26} = func{23};
let Inst{25-16} = func{20-11};
- let Inst{13} = 1;
- let Inst{11} = 1;
+ let Inst{13} = func{22};
+ let Inst{11} = func{21};
let Inst{10-1} = func{10-1};
let Inst{0} = 0; // func{0} is assumed zero
}
// Also used for Thumb2
- def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
+ def tBLXr : TI<(outs), (ins pred:$p, GPR:$func), IIC_Br,
"blx${p}\t$func",
[(ARMtcall GPR:$func)]>,
Requires<[IsThumb, HasV5T]>,
@@ -457,7 +459,7 @@ let isCall = 1,
}
// ARMv4T
- def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func),
4, IIC_Br,
[(ARMcall_nolink tGPR:$func)]>,
Requires<[IsThumb, IsThumb1Only]>;
@@ -504,7 +506,7 @@ let isBranch = 1, isTerminator = 1 in
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// IOS versions.
let Uses = [SP] in {
- def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+ def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst),
4, IIC_Br, [],
(tBX GPR:$dst, (ops 14, zero_reg))>,
Requires<[IsThumb]>;
@@ -514,7 +516,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// Non-IOS version:
let Uses = [SP] in {
def tTAILJMPdND : tPseudoExpand<(outs),
- (ins t_brtarget:$dst, pred:$p, variable_ops),
+ (ins t_brtarget:$dst, pred:$p),
4, IIC_Br, [],
(tB t_brtarget:$dst, pred:$p)>,
Requires<[IsThumb, IsNotIOS]>;
@@ -1398,7 +1400,7 @@ def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>;
// For round-trip assembly/disassembly, we have to handle a CPS instruction
// without any iflags. That's not, strictly speaking, valid syntax, but it's
-// a useful extention and assembles to defined behaviour (the insn does
+// a useful extension and assembles to defined behaviour (the insn does
// nothing).
def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index e6fb9d5..307006f 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -62,6 +62,15 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-((int)N->getZExtValue()), MVT::i32);
}]>;
+// so_imm_notSext_XFORM - Return a so_imm value packed into the format
+// described for so_imm_notSext def below, with sign extension from 16
+// bits.
+def t2_so_imm_notSext16_XFORM : SDNodeXForm<imm, [{
+ APInt apIntN = N->getAPIntValue();
+ unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue();
+ return CurDAG->getTargetConstant(~N16bitSignExt, MVT::i32);
+}]>;
+
// t2_so_imm - Match a 32-bit immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
// immediate splatted into multiple bytes of the word.
@@ -86,6 +95,17 @@ def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{
let ParserMatchClass = t2_so_imm_not_asmoperand;
}
+// t2_so_imm_notSext - match an immediate that is a complement of a t2_so_imm
+// if the upper 16 bits are zero.
+def t2_so_imm_notSext : Operand<i32>, PatLeaf<(imm), [{
+ APInt apIntN = N->getAPIntValue();
+ if (!apIntN.isIntN(16)) return false;
+ unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue();
+ return ARM_AM::getT2SOImmVal(~N16bitSignExt) != -1;
+ }], t2_so_imm_notSext16_XFORM> {
+ let ParserMatchClass = t2_so_imm_not_asmoperand;
+}
+
// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; }
def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
@@ -152,6 +172,7 @@ def t2ldr_pcrel_imm12 : Operand<i32> {
// ADR instruction labels.
def t2adrlabel : Operand<i32> {
let EncoderMethod = "getT2AdrLabelOpValue";
+ let PrintMethod = "printAdrLabelOperand";
}
@@ -509,7 +530,7 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
/// changed to modify CPSR.
multiclass T2I_bin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0,
+ PatFrag opnode, bit Commutable = 0,
string wide = ""> {
// shifted imm
def ri : T2sTwoRegImm<
@@ -545,15 +566,15 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
// Assembly aliases for optional destination operand when it's the same
// as the source operand.
def : t2InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn,
t2_so_imm:$imm, pred:$p,
cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn,
rGPR:$Rm, pred:$p,
cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn,
t2_so_reg:$shift, pred:$p,
cc_out:$s)>;
}
@@ -562,36 +583,30 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
// the ".w" suffix to indicate that they are wide.
multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0> :
- T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> {
+ PatFrag opnode, bit Commutable = 0> :
+ T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w"> {
// Assembler aliases w/ the ".w" suffix.
def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
- t2_so_imm:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
// Assembler aliases w/o the ".w" suffix.
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rd, rGPR:$Rn,
- t2_so_reg:$shift, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rd, rGPR:$Rn, t2_so_reg:$shift,
+ pred:$p, cc_out:$s)>;
// and with the optional destination operand, too.
def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
- t2_so_imm:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm,
+ pred:$p, cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
- t2_so_reg:$shift, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$shift,
+ pred:$p, cc_out:$s)>;
}
/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
@@ -668,16 +683,16 @@ let hasPostISelHook = 1, Defs = [CPSR] in {
multiclass T2I_rbin_s_is<PatFrag opnode> {
// shifted imm
def ri : t2PseudoInst<(outs rGPR:$Rd),
- (ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p),
+ (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p),
4, IIC_iALUi,
[(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm,
- GPRnopc:$Rn))]>;
+ rGPR:$Rn))]>;
// shifted register
def rs : t2PseudoInst<(outs rGPR:$Rd),
- (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
+ (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
4, IIC_iALUsi,
[(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm,
- GPRnopc:$Rn))]>;
+ rGPR:$Rn))]>;
}
}
@@ -788,8 +803,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
// rotate operation that produces a value.
-multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
- string baseOpc> {
+multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> {
// 5-bit imm
def ri : T2sTwoRegShiftImm<
(outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi,
@@ -814,33 +828,27 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
// Optional destination register
def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
- ty:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
// Assembler aliases w/o the ".w" suffix.
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
- ty:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, ty:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
// and with the optional destination operand, too.
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
- ty:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
}
/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
@@ -848,7 +856,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
/// a explicit result, only implicitly set CPSR.
multiclass T2I_cmp_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc> {
+ PatFrag opnode> {
let isCompare = 1, Defs = [CPSR] in {
// shifted imm
def ri : T2OneRegCmpImm<
@@ -893,12 +901,9 @@ let isCompare = 1, Defs = [CPSR] in {
// No alias here for 'rr' version as not all instantiations of this
// multiclass want one (CMP in particular, does not).
def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPRnopc:$Rn,
- t2_so_imm:$imm, pred:$p)>;
+ (!cast<Instruction>(NAME#"ri") GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPRnopc:$Rn,
- t2_so_reg:$shift,
- pred:$p)>;
+ (!cast<Instruction>(NAME#"rs") GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
}
/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
@@ -1911,11 +1916,16 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
(t2SUBri GPR:$src, t2_so_imm_neg:$imm)>;
def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
(t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm),
+ (t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
+
let AddedComplexity = 1 in
def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm),
(t2SUBSri rGPR:$src, imm0_255_neg:$imm)>;
def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm),
(t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(ARMaddc rGPR:$src, imm0_65535_neg:$imm),
+ (t2SUBSrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
@@ -1924,6 +1934,8 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR),
(t2SBCri rGPR:$src, imm0_255_not:$imm)>;
def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR),
(t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
+def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR),
+ (t2SBCrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
// Select Bytes -- for disassembly only
@@ -2125,17 +2137,17 @@ def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
//
defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31,
- BinOpFrag<(shl node:$LHS, node:$RHS)>, "t2LSL">;
+ BinOpFrag<(shl node:$LHS, node:$RHS)>>;
defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr,
- BinOpFrag<(srl node:$LHS, node:$RHS)>, "t2LSR">;
+ BinOpFrag<(srl node:$LHS, node:$RHS)>>;
defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr,
- BinOpFrag<(sra node:$LHS, node:$RHS)>, "t2ASR">;
+ BinOpFrag<(sra node:$LHS, node:$RHS)>>;
defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31,
- BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">;
+ BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
-def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
- (t2RORrr rGPR:$lhs, rGPR:$rhs)>;
+def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
+ (t2RORrr rGPR:$lhs, rGPR:$rhs)>;
let Uses = [CPSR] in {
def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
@@ -2187,18 +2199,17 @@ def t2MOVsra_flag : T2TwoRegShiftImm<
defm t2AND : T2I_bin_w_irs<0b0000, "and",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(and node:$LHS, node:$RHS)>, "t2AND", 1>;
+ BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
defm t2ORR : T2I_bin_w_irs<0b0010, "orr",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(or node:$LHS, node:$RHS)>, "t2ORR", 1>;
+ BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(xor node:$LHS, node:$RHS)>, "t2EOR", 1>;
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(and node:$LHS, (not node:$RHS))>,
- "t2BIC">;
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
class T2BitFI<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -2278,8 +2289,7 @@ let Constraints = "$src = $Rd" in {
defm t2ORN : T2I_bin_irs<0b0011, "orn",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(or node:$LHS, (not node:$RHS))>,
- "t2ORN", 0, "">;
+ BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">;
/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
/// unary operation that produces a value. These are predicable and can be
@@ -2332,6 +2342,17 @@ let AddedComplexity = 1 in
def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
(t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
+// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
+def top16Zero: PatLeaf<(i32 rGPR:$src), [{
+ return CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
+ }]>;
+
+// so_imm_notSext is needed instead of so_imm_not, as the value of imm
+// will match the extended, not the original bitWidth for $src.
+def : T2Pat<(and top16Zero:$src, t2_so_imm_notSext:$imm),
+ (t2BICri rGPR:$src, t2_so_imm_notSext:$imm)>;
+
+
// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
(t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
@@ -2840,7 +2861,7 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
//
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
- BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>, "t2CMP">;
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm),
(t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>;
@@ -2849,29 +2870,68 @@ def : T2Pat<(ARMcmpZ GPRnopc:$lhs, rGPR:$rhs),
def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_reg:$rhs),
(t2CMPrs GPRnopc:$lhs, t2_so_reg:$rhs)>;
-//FIXME: Disable CMN, as CCodes are backwards from compare expectations
-// Compare-to-zero still works out, just not the relationals
-//defm t2CMN : T2I_cmp_irs<0b1000, "cmn",
-// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
-defm t2CMNz : T2I_cmp_irs<0b1000, "cmn",
- IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
- BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>,
- "t2CMNz">;
+let isCompare = 1, Defs = [CPSR] in {
+ // shifted imm
+ def t2CMNri : T2OneRegCmpImm<
+ (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iCMPi,
+ "cmn", ".w\t$Rn, $imm",
+ [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b1000;
+ let Inst{20} = 1; // The S bit.
+ let Inst{15} = 0;
+ let Inst{11-8} = 0b1111; // Rd
+ }
+ // register
+ def t2CMNzrr : T2TwoRegCmp<
+ (outs), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iCMPr,
+ "cmn", ".w\t$Rn, $Rm",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPRnopc:$Rn, rGPR:$Rm)]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b1000;
+ let Inst{20} = 1; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{11-8} = 0b1111; // Rd
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def t2CMNzrs : T2OneRegCmpShiftedReg<
+ (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iCMPsi,
+ "cmn", ".w\t$Rn, $ShiftedRm",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b1000;
+ let Inst{20} = 1; // The S bit.
+ let Inst{11-8} = 0b1111; // Rd
+ }
+}
-//def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
-// (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
+// Assembler aliases w/o the ".w" suffix.
+// No alias here for 'rr' version as not all instantiations of this multiclass
+// want one (CMP in particular, does not).
+def : t2InstAlias<"cmn${p} $Rn, $imm",
+ (t2CMNri GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rn, $shift",
+ (t2CMNzrs GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
-def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm),
- (t2CMNzri GPRnopc:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
+ (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
+
+def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm),
+ (t2CMNri GPRnopc:$src, t2_so_imm_neg:$imm)>;
defm t2TST : T2I_cmp_irs<0b0000, "tst",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
- BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>,
- "t2TST">;
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
- BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>,
- "t2TEQ">;
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
@@ -3017,7 +3077,7 @@ def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
def t2ISB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
"isb", "\t$opt",
- []>, Requires<[IsThumb2, HasDB]> {
+ []>, Requires<[IsThumb, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f6;
let Inst{3-0} = opt;
@@ -3271,7 +3331,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// IOS version.
let Uses = [SP] in
def tTAILJMPd: tPseudoExpand<(outs),
- (ins uncondbrtarget:$dst, pred:$p, variable_ops),
+ (ins uncondbrtarget:$dst, pred:$p),
4, IIC_Br, [],
(t2B uncondbrtarget:$dst, pred:$p)>,
Requires<[IsThumb2, IsIOS]>;
@@ -3281,7 +3341,7 @@ let isCall = 1, Defs = [LR], Uses = [SP] in {
// mov lr, pc; b if callee is marked noreturn to avoid confusing the
// return stack predictor.
def t2BMOVPCB_CALL : tPseudoInst<(outs),
- (ins t_bltarget:$func, variable_ops),
+ (ins t_bltarget:$func),
6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
Requires<[IsThumb]>;
}
@@ -3382,21 +3442,18 @@ let imod = 0, iflags = 0, M = 1 in
// A6.3.4 Branches and miscellaneous control
// Table A6-14 Change Processor State, and hint instructions
-class T2I_hint<bits<8> op7_0, string opc, string asm>
- : T2I<(outs), (ins), NoItinerary, opc, asm, []> {
- let Inst{31-20} = 0xf3a;
- let Inst{19-16} = 0b1111;
- let Inst{15-14} = 0b10;
- let Inst{12} = 0;
- let Inst{10-8} = 0b000;
- let Inst{7-0} = op7_0;
+def t2HINT : T2I<(outs), (ins imm0_255:$imm), NoItinerary, "hint", "\t$imm",[]>{
+ bits<8> imm;
+ let Inst{31-8} = 0b111100111010111110000000;
+ let Inst{7-0} = imm;
}
-def t2NOP : T2I_hint<0b00000000, "nop", ".w">;
-def t2YIELD : T2I_hint<0b00000001, "yield", ".w">;
-def t2WFE : T2I_hint<0b00000010, "wfe", ".w">;
-def t2WFI : T2I_hint<0b00000011, "wfi", ".w">;
-def t2SEV : T2I_hint<0b00000100, "sev", ".w">;
+def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_255:$imm, pred:$p)>;
+def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>;
+def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>;
+def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>;
+def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p)>;
+def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p)>;
def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
bits<4> opt;
@@ -3622,8 +3679,8 @@ defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">;
// A/R class MRS.
//
// A/R class can only move from CPSR or SPSR.
-def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", []>,
- Requires<[IsThumb2,IsARClass]> {
+def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr",
+ []>, Requires<[IsThumb2,IsARClass]> {
bits<4> Rd;
let Inst{31-12} = 0b11110011111011111000;
let Inst{11-8} = Rd;
@@ -3632,8 +3689,8 @@ def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", []>
def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>;
-def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", []>,
- Requires<[IsThumb2,IsARClass]> {
+def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
+ []>, Requires<[IsThumb2,IsARClass]> {
bits<4> Rd;
let Inst{31-12} = 0b11110011111111111000;
let Inst{11-8} = Rd;
@@ -3646,7 +3703,7 @@ def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", [
// the A/R class (a full msr_mask).
def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary,
"mrs", "\t$Rd, $mask", []>,
- Requires<[IsThumb2,IsMClass]> {
+ Requires<[IsThumb,IsMClass]> {
bits<4> Rd;
bits<8> mask;
let Inst{31-12} = 0b11110011111011111000;
@@ -3682,14 +3739,14 @@ def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn),
// Move from ARM core register to Special Register
def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn),
NoItinerary, "msr", "\t$SYSm, $Rn", []>,
- Requires<[IsThumb2,IsMClass]> {
- bits<8> SYSm;
+ Requires<[IsThumb,IsMClass]> {
+ bits<12> SYSm;
bits<4> Rn;
let Inst{31-21} = 0b11110011100;
let Inst{20} = 0b0;
let Inst{19-16} = Rn;
let Inst{15-12} = 0b1000;
- let Inst{7-0} = SYSm;
+ let Inst{11-0} = SYSm;
}
@@ -3969,6 +4026,17 @@ def : t2InstAlias<"add${s}${p} $Rdn, $imm",
def : t2InstAlias<"add${p} $Rdn, $imm",
(t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p}.w $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"addw${p} $Rd, $Rn, $imm",
+ (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p}.w $Rdn, $imm",
+ (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"addw${p} $Rdn, $imm",
+ (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
+
// Aliases for SUB without the ".w" optional width specifier.
def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm",
@@ -4002,9 +4070,9 @@ def : t2InstAlias<"tst${p} $Rn, $Rm",
(t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
// Memory barriers
-def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb2, HasDB]>;
-def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb2, HasDB]>;
-def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb2, HasDB]>;
+def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb, HasDB]>;
+def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb, HasDB]>;
+def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb, HasDB]>;
// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional
// width specifier.
@@ -4213,7 +4281,7 @@ def : t2InstAlias<"add${s}${p} $Rd, $imm",
pred:$p, cc_out:$s)>;
// Same for CMP <--> CMN via t2_so_imm_neg
def : t2InstAlias<"cmp${p} $Rd, $imm",
- (t2CMNzri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+ (t2CMNri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
def : t2InstAlias<"cmn${p} $Rd, $imm",
(t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 3600b88..23c132e 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -221,11 +221,13 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
// FP Binary Operations.
//
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VADDD : ADbI<0b11100, 0b11, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
[(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VADDS : ASbIn<0b11100, 0b11, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
@@ -235,11 +237,13 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0,
let D = VFPNeonA8Domain;
}
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
[(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
@@ -249,21 +253,25 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
let D = VFPNeonA8Domain;
}
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
[(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VDIVS : ASbI<0b11101, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VMULD : ADbI<0b11100, 0b10, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
[(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VMULS : ASbIn<0b11100, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
@@ -559,8 +567,8 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010,
bits<4> Rt2;
// Encode instruction operands.
- let Inst{3-0} = src1{3-0};
- let Inst{5} = src1{4};
+ let Inst{3-0} = src1{4-1};
+ let Inst{5} = src1{0};
let Inst{15-12} = Rt;
let Inst{19-16} = Rt2;
@@ -609,8 +617,8 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
bits<4> src2;
// Encode instruction operands.
- let Inst{3-0} = dst1{3-0};
- let Inst{5} = dst1{4};
+ let Inst{3-0} = dst1{4-1};
+ let Inst{5} = dst1{0};
let Inst{15-12} = src1;
let Inst{19-16} = src2;
@@ -819,9 +827,9 @@ let Constraints = "$a = $dst" in {
// FP to Fixed-Point:
// Single Precision register
-class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
- dag oops, dag iops, InstrItinClass itin, string opc, string asm,
- list<dag> pattern>
+class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
+ bit op5, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
: AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
@@ -830,9 +838,9 @@ class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bi
}
// Double Precision register
-class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
- dag oops, dag iops, InstrItinClass itin, string opc, string asm,
- list<dag> pattern>
+class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
+ bit op5, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
: AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
@@ -1081,10 +1089,11 @@ def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
-def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)),
+// (fma x, y, z) -> (vfms z, x, y)
+def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
(VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
-def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)),
+def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
(VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1115,18 +1124,18 @@ def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
-// (fma (fneg x), y, z) -> (vfms x, y, z)
-def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)),
+// (fma (fneg x), y, z) -> (vfms z, x, y)
+def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)),
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
-def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)),
+def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-// (fneg (fma x, (fneg y), z) -> (vfms x, y, z)
-def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))),
+// (fma x, (fneg y), z) -> (vfms z, x, y)
+def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
-def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))),
+def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1157,18 +1166,18 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
-// (fneg (fma x, y, z)) -> (vfnma x, y, z)
-def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))),
+// (fneg (fma x, y, z)) -> (vfnma z, x, y)
+def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
-def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))),
+def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-// (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z)
-def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))),
+// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y)
+def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
-def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))),
+def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1198,18 +1207,26 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
-// (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z)
-def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))),
+
+// (fma x, y, (fneg z)) -> (vfnms z, x, y))
+def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))),
+ (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
+ (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
+def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
-def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))),
+def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-// (fma x, (fneg y), z) -> (vnfms x, y, z)
-def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)),
+// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
+def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
-def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)),
+def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1426,22 +1443,6 @@ def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr",
def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr",
(VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
-// VMUL has a two-operand form (implied destination operand)
-def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm",
- (VMULD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
-def : VFP2InstAlias<"vmul${p}.f32 $Sn, $Sm",
- (VMULS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
-// VADD has a two-operand form (implied destination operand)
-def : VFP2InstAlias<"vadd${p}.f64 $Dn, $Dm",
- (VADDD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
-def : VFP2InstAlias<"vadd${p}.f32 $Sn, $Sm",
- (VADDS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
-// VSUB has a two-operand form (implied destination operand)
-def : VFP2InstAlias<"vsub${p}.f64 $Dn, $Dm",
- (VSUBD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
-def : VFP2InstAlias<"vsub${p}.f32 $Sn, $Sm",
- (VSUBS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
-
// VMOV can accept optional 32-bit or less data type suffix suffix.
def : VFP2InstAlias<"vmov${p}.8 $Rt, $Sn",
(VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 98930cc..3f99cce 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -289,9 +289,9 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
ResultPtr = ResultPtr >> 2;
*((intptr_t*)RelocPos) |= ResultPtr;
- // Set register Rn to PC.
- *((intptr_t*)RelocPos) |=
- getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ // Set register Rn to PC (which is register 15 on all architectures).
+ // FIXME: This avoids the need for register info in the JIT class.
+ *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift;
break;
}
case ARM::reloc_arm_pic_jt:
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 9ef2ace..897ceb6 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -456,8 +456,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
DebugLoc dl = Loc->getDebugLoc();
const MachineOperand &PMO = Loc->getOperand(0);
unsigned PReg = PMO.getReg();
- unsigned PRegNum = PMO.isUndef() ? UINT_MAX
- : getARMRegisterNumbering(PReg);
+ unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
unsigned Count = 1;
unsigned Limit = ~0U;
@@ -483,8 +482,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
int NewOffset = MemOps[i].Offset;
const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
unsigned Reg = MO.getReg();
- unsigned RegNum = MO.isUndef() ? UINT_MAX
- : getARMRegisterNumbering(Reg);
+ unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
// Register numbers must be in ascending order. For VFP / NEON load and
// store multiples, the registers must also be consecutive and within the
// limit on the number of registers per instruction.
@@ -1177,8 +1175,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
NewBBI = llvm::prior(MBBI);
- if (isT2 && NewOpc == ARM::t2LDRi8 && OffImm+4 >= 0)
- NewOpc = ARM::t2LDRi12;
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, false,
BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
@@ -1326,7 +1322,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
// First advance to the instruction just before the start of the chain.
AdvanceRS(MBB, MemOps);
// Find a scratch register.
- unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
+ unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
// Process the load / store instructions.
RS->forward(prior(MBBI));
@@ -1739,7 +1735,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
Ops.pop_back();
const MCInstrDesc &MCID = TII->get(NewOpc);
- const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
+ const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
MRI->constrainRegClass(EvenReg, TRC);
MRI->constrainRegClass(OddReg, TRC);
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 1466e98..6f974fd 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -12,16 +12,16 @@
//===----------------------------------------------------------------------===//
// Registers are identified with 4-bit ID numbers.
-class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
- field bits<4> Num;
+class ARMReg<bits<16> Enc, string n, list<Register> subregs = []> : Register<n> {
+ let HWEncoding = Enc;
let Namespace = "ARM";
let SubRegs = subregs;
// All bits of ARM registers with sub-registers are covered by sub-registers.
let CoveredBySubRegs = 1;
}
-class ARMFReg<bits<6> num, string n> : Register<n> {
- field bits<6> Num;
+class ARMFReg<bits<16> Enc, string n> : Register<n> {
+ let HWEncoding = Enc;
let Namespace = "ARM";
}
@@ -267,21 +267,16 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
// Subset of DPR that are accessible with VFP2 (and so that also have
// 32-bit SPR subregs).
def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
- (trunc DPR, 16)> {
- let SubRegClasses = [(SPR ssub_0, ssub_1)];
-}
+ (trunc DPR, 16)>;
// Subset of DPR which can be used as a source of NEON scalars for 16-bit
// operations
def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
- (trunc DPR, 8)> {
- let SubRegClasses = [(SPR_8 ssub_0, ssub_1)];
-}
+ (trunc DPR, 8)>;
// Generic 128-bit vector register class.
def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
(sequence "Q%u", 0, 15)> {
- let SubRegClasses = [(DPR dsub_0, dsub_1)];
// Allocate non-VFP2 aliases Q8-Q15 first.
let AltOrders = [(rotl QPR, 8)];
let AltOrderSelect = [{ return 1; }];
@@ -289,17 +284,11 @@ def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
// Subset of QPR that have 32-bit SPR subregs.
def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- 128, (trunc QPR, 8)> {
- let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3),
- (DPR_VFP2 dsub_0, dsub_1)];
-}
+ 128, (trunc QPR, 8)>;
// Subset of QPR that have DPR_8 and SPR_8 subregs.
def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- 128, (trunc QPR, 4)> {
- let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3),
- (DPR_8 dsub_0, dsub_1)];
-}
+ 128, (trunc QPR, 4)>;
// Pseudo-registers representing odd-even pairs of D registers. The even-odd
// pairs are already represented by the Q registers.
@@ -338,8 +327,6 @@ def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], [(shl QPR, 0), (shl QPR, 1)]>;
// Pseudo 256-bit vector register class to model pairs of Q registers
// (4 consecutive D registers).
def QQPR : RegisterClass<"ARM", [v4i64], 256, (add Tuples2Q)> {
- let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
- (QPR qsub_0, qsub_1)];
// Allocate non-VFP2 aliases first.
let AltOrders = [(rotl QQPR, 8)];
let AltOrderSelect = [{ return 1; }];
@@ -363,9 +350,6 @@ def Tuples2QQ : RegisterTuples<[qqsub_0, qqsub_1],
// Pseudo 512-bit vector register class to model 4 consecutive Q registers
// (8 consecutive D registers).
def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (add Tuples2QQ)> {
- let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
- dsub_4, dsub_5, dsub_6, dsub_7),
- (QPR qsub_0, qsub_1, qsub_2, qsub_3)];
// Allocate non-VFP2 aliases first.
let AltOrders = [(rotl QQQQPR, 8)];
let AltOrderSelect = [{ return 1; }];
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 45486fd..81d2fa3 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -70,11 +70,11 @@ def IIC_iLoad_bh_siu : InstrItinClass;
def IIC_iLoad_d_i : InstrItinClass;
def IIC_iLoad_d_r : InstrItinClass;
def IIC_iLoad_d_ru : InstrItinClass;
-def IIC_iLoad_m : InstrItinClass<0>; // micro-coded
-def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded
-def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded
-def IIC_iPop : InstrItinClass<0>; // micro-coded
-def IIC_iPop_Br : InstrItinClass<0>; // micro-coded
+def IIC_iLoad_m : InstrItinClass;
+def IIC_iLoad_mu : InstrItinClass;
+def IIC_iLoad_mBr : InstrItinClass;
+def IIC_iPop : InstrItinClass;
+def IIC_iPop_Br : InstrItinClass;
def IIC_iLoadiALU : InstrItinClass;
def IIC_iStore_i : InstrItinClass;
def IIC_iStore_r : InstrItinClass;
@@ -91,8 +91,8 @@ def IIC_iStore_bh_siu : InstrItinClass;
def IIC_iStore_d_i : InstrItinClass;
def IIC_iStore_d_r : InstrItinClass;
def IIC_iStore_d_ru : InstrItinClass;
-def IIC_iStore_m : InstrItinClass<0>; // micro-coded
-def IIC_iStore_mu : InstrItinClass<0>; // micro-coded
+def IIC_iStore_m : InstrItinClass;
+def IIC_iStore_mu : InstrItinClass;
def IIC_Preload : InstrItinClass;
def IIC_Br : InstrItinClass;
def IIC_fpSTAT : InstrItinClass;
@@ -126,12 +126,12 @@ def IIC_fpSQRT32 : InstrItinClass;
def IIC_fpSQRT64 : InstrItinClass;
def IIC_fpLoad32 : InstrItinClass;
def IIC_fpLoad64 : InstrItinClass;
-def IIC_fpLoad_m : InstrItinClass<0>; // micro-coded
-def IIC_fpLoad_mu : InstrItinClass<0>; // micro-coded
+def IIC_fpLoad_m : InstrItinClass;
+def IIC_fpLoad_mu : InstrItinClass;
def IIC_fpStore32 : InstrItinClass;
def IIC_fpStore64 : InstrItinClass;
-def IIC_fpStore_m : InstrItinClass<0>; // micro-coded
-def IIC_fpStore_mu : InstrItinClass<0>; // micro-coded
+def IIC_fpStore_m : InstrItinClass;
+def IIC_fpStore_mu : InstrItinClass;
def IIC_VLD1 : InstrItinClass;
def IIC_VLD1x2 : InstrItinClass;
def IIC_VLD1x3 : InstrItinClass;
@@ -258,8 +258,6 @@ def IIC_VTBX4 : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
-def GenericItineraries : ProcessorItineraries<[], [], []>;
-
include "ARMScheduleV6.td"
include "ARMScheduleA8.td"
include "ARMScheduleA9.td"
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 8b1fb93..2c63825 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -151,28 +151,30 @@ def CortexA8Itineraries : ProcessorItineraries<
// Load multiple, def is the 5th operand. Pipeline 0 only.
// FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>,
- InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1, 3], [], -1>, // dynamic uops
//
// Load multiple + update, defs are the 1st and 5th operands.
InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
- InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 3], [], -1>, // dynamic uops
//
// Load multiple plus branch
InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
InstrStage<3, [A8_LSPipe]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
- [1, 2, 1, 1, 3]>,
+ [1, 2, 1, 1, 3], [], -1>, // dynamic uops
//
// Pop, def is the 3rd operand.
InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>,
- InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 3], [], -1>, // dynamic uops
//
// Push, def is the 3th operand.
InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>,
InstrStage<3, [A8_LSPipe]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
- [1, 1, 3]>,
-
+ [1, 1, 3], [], -1>, // dynamic uops
//
// iLoadi + iALUr for t2LDRpci_pic.
InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
@@ -227,12 +229,13 @@ def CortexA8Itineraries : ProcessorItineraries<
// Store multiple. Pipeline 0 only.
// FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
- InstrStage<2, [A8_LSPipe]>]>,
+ InstrStage<2, [A8_LSPipe]>],
+ [], [], -1>, // dynamic uops
//
// Store multiple + update
InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
- InstrStage<2, [A8_LSPipe]>], [2]>,
-
+ InstrStage<2, [A8_LSPipe]>],
+ [2], [], -1>, // dynamic uops
//
// Preload
InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
@@ -393,14 +396,16 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrStage<1, [A8_NLSPipe], 0>,
InstrStage<1, [A8_LSPipe]>,
InstrStage<1, [A8_NLSPipe], 0>,
- InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1, 1, 2], [], -1>, // dynamic uops
//
// FP Load Multiple + update
InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe], 0>,
InstrStage<1, [A8_LSPipe]>,
InstrStage<1, [A8_NLSPipe], 0>,
- InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1, 1, 1, 2], [], -1>, // dynamic uops
//
// Single-precision FP Store
InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
@@ -419,15 +424,16 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrStage<1, [A8_NLSPipe], 0>,
InstrStage<1, [A8_LSPipe]>,
InstrStage<1, [A8_NLSPipe], 0>,
- InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1, 1, 1], [], -1>, // dynamic uops
//
// FP Store Multiple + update
InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe], 0>,
InstrStage<1, [A8_LSPipe]>,
InstrStage<1, [A8_NLSPipe], 0>,
- InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>,
-
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1], [], -1>, // dynamic uops
// NEON
// Issue through integer pipeline, and execute in NEON unit.
//
@@ -1051,3 +1057,19 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrStage<1, [A8_NPipe], 0>,
InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
]>;
+
+// ===---------------------------------------------------------------------===//
+// This following definitions describe the simple machine model which
+// will replace itineraries.
+
+// Cortex-A8 machine model for scheduling and other instruction cost heuristics.
+def CortexA8Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 2; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 13; // Based on estimate of pipeline depth.
+
+ let Itineraries = CortexA8Itineraries;
+}
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 0d710cc..7bc590f 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -11,6 +11,10 @@
//
//===----------------------------------------------------------------------===//
+// ===---------------------------------------------------------------------===//
+// This section contains legacy support for itineraries. This is
+// required until SD and PostRA schedulers are replaced by MachineScheduler.
+
//
// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
// Reference Manual".
@@ -280,7 +284,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<2, [A9_AGU], 1>,
InstrStage<2, [A9_LSUnit]>],
[1, 1, 1, 1, 3],
- [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
//
// Load multiple + update, defs are the 1st and 5th operands.
InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -288,7 +293,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<2, [A9_AGU], 1>,
InstrStage<2, [A9_LSUnit]>],
[2, 1, 1, 1, 3],
- [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
//
// Load multiple plus branch
InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -297,7 +303,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<2, [A9_LSUnit]>,
InstrStage<1, [A9_Branch]>],
[1, 2, 1, 1, 3],
- [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
//
// Pop, def is the 3rd operand.
InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -305,7 +312,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<2, [A9_AGU], 1>,
InstrStage<2, [A9_LSUnit]>],
[1, 1, 3],
- [NoBypass, NoBypass, A9_LdBypass]>,
+ [NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
//
// Pop + branch, def is the 3rd operand.
InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -314,8 +322,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<2, [A9_LSUnit]>,
InstrStage<1, [A9_Branch]>],
[1, 1, 3],
- [NoBypass, NoBypass, A9_LdBypass]>,
-
+ [NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
//
// iLoadi + iALUr for t2LDRpci_pic.
InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -409,14 +417,15 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_AGU], 0>,
- InstrStage<2, [A9_LSUnit]>]>,
+ InstrStage<2, [A9_LSUnit]>],
+ [], [], -1>, // dynamic uops
//
// Store multiple + update
InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_AGU], 0>,
- InstrStage<2, [A9_LSUnit]>], [2]>,
-
+ InstrStage<2, [A9_LSUnit]>],
+ [2], [], -1>, // dynamic uops
//
// Preload
InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
@@ -713,7 +722,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1], [], -1>, // dynamic uops
//
// FP Load Multiple + update
// FIXME: assumes 2 doubles which requires 2 LS cycles.
@@ -722,7 +732,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1], [], -1>, // dynamic uops
//
// Single-precision FP Store
InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -749,7 +760,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1], [], -1>, // dynamic uops
//
// FP Store Multiple + update
// FIXME: assumes 2 doubles which requires 2 LS cycles.
@@ -758,7 +770,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1], [], -1>, // dynamic uops
// NEON
// VLD1
InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -1861,3 +1874,22 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<2, [A9_NPipe]>],
[4, 1, 2, 2, 3, 3, 1]>
]>;
+
+// ===---------------------------------------------------------------------===//
+// This following definitions describe the simple machine model which
+// will replace itineraries.
+
+// Cortex-A9 machine model for scheduling and other instruction cost heuristics.
+def CortexA9Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
+ let LoadLatency = 2; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 8; // Based on estimate of pipeline depth.
+
+ let Itineraries = CortexA9Itineraries;
+}
+
+// TODO: Add Cortex-A9 processor and scheduler resources.
+
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index e2530d0..31d5d38 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -179,8 +179,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
Args.push_back(Entry);
// Emit __eabi_memset call
- std::pair<SDValue,SDValue> CallResult =
- TLI.LowerCallTo(Chain,
+ TargetLowering::CallLoweringInfo CLI(Chain,
Type::getVoidTy(*DAG.getContext()), // return type
false, // return sign ext
false, // return zero ext
@@ -193,7 +192,9 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
false, // is return val used
DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
TLI.getPointerTy()), // callee
- Args, DAG, dl); // arg list, DAG and debug
+ Args, DAG, dl);
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(CLI);
return CallResult.second;
}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index e247b76..4762854 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -67,6 +67,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
, HasDataBarrier(false)
, Pref32BitThumb(false)
, AvoidCPSRPartialUpdate(false)
+ , HasRAS(false)
, HasMPExtension(false)
, FPOnlySP(false)
, AllowsUnalignedMem(false)
@@ -82,7 +83,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
// Insert the architecture feature derived from the target triple into the
// feature string. This is important for setting features that are implied
// based on the architecture version.
- std::string ArchFS = ARM_MC::ParseARMTriple(TT);
+ std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPUString);
if (!FS.empty()) {
if (!ArchFS.empty())
ArchFS = ArchFS + "," + FS;
@@ -96,13 +97,13 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
if (!HasV6T2Ops && hasThumb2())
HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true;
+ // Keep a pointer to static instruction cost data for the specified CPU.
+ SchedModel = getSchedModelForCPU(CPUString);
+
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
- // After parsing Itineraries, set ItinData.IssueWidth.
- computeIssueWidth();
-
- if (TT.find("eabi") != std::string::npos)
+ if ((TT.find("eabi") != std::string::npos) || (isTargetIOS() && isMClass()))
// FIXME: We might want to separate AAPCS and EABI. Some systems, e.g.
// Darwin-EABI conforms to AACPS but not the rest of EABI.
TargetABI = ARM_ABI_AAPCS;
@@ -181,31 +182,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
}
unsigned ARMSubtarget::getMispredictionPenalty() const {
- // If we have a reasonable estimate of the pipeline depth, then we can
- // estimate the penalty of a misprediction based on that.
- if (isCortexA8())
- return 13;
- else if (isCortexA9())
- return 8;
-
- // Otherwise, just return a sensible default.
- return 10;
-}
-
-void ARMSubtarget::computeIssueWidth() {
- unsigned allStage1Units = 0;
- for (const InstrItinerary *itin = InstrItins.Itineraries;
- itin->FirstStage != ~0U; ++itin) {
- const InstrStage *IS = InstrItins.Stages + itin->FirstStage;
- allStage1Units |= IS->getUnits();
- }
- InstrItins.IssueWidth = 0;
- while (allStage1Units) {
- ++InstrItins.IssueWidth;
- // clear the lowest bit
- allStage1Units ^= allStage1Units & ~(allStage1Units - 1);
- }
- assert(InstrItins.IssueWidth <= 2 && "itinerary bug, too many stage 1 units");
+ return SchedModel->MispredictPenalty;
}
bool ARMSubtarget::enablePostRAScheduler(
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index e72b06f..b394061 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -74,7 +74,7 @@ protected:
/// HasThumb2 - True if Thumb2 instructions are supported.
bool HasThumb2;
- /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
+ /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
/// v6m, v7m for example.
bool IsMClass;
@@ -155,6 +155,9 @@ protected:
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
+ /// SchedModel - Processor specific instruction costs.
+ const MCSchedModel *SchedModel;
+
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 9aa8308..171c9ad 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -136,22 +136,22 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
bool ARMPassConfig::addPreISel() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge)
- PM->add(createGlobalMergePass(TM->getTargetLowering()));
+ addPass(createGlobalMergePass(TM->getTargetLowering()));
return false;
}
bool ARMPassConfig::addInstSelector() {
- PM->add(createARMISelDag(getARMTargetMachine(), getOptLevel()));
+ addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
return false;
}
bool ARMPassConfig::addPreRegAlloc() {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only())
- PM->add(createARMLoadStoreOptimizationPass(true));
+ addPass(createARMLoadStoreOptimizationPass(true));
if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
- PM->add(createMLxExpansionPass());
+ addPass(createMLxExpansionPass());
return true;
}
@@ -159,23 +159,23 @@ bool ARMPassConfig::addPreSched2() {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (getOptLevel() != CodeGenOpt::None) {
if (!getARMSubtarget().isThumb1Only()) {
- PM->add(createARMLoadStoreOptimizationPass());
+ addPass(createARMLoadStoreOptimizationPass());
printAndVerify("After ARM load / store optimizer");
}
if (getARMSubtarget().hasNEON())
- PM->add(createExecutionDependencyFixPass(&ARM::DPRRegClass));
+ addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
// Expand some pseudo instructions into multiple instructions to allow
// proper scheduling.
- PM->add(createARMExpandPseudoPass());
+ addPass(createARMExpandPseudoPass());
if (getOptLevel() != CodeGenOpt::None) {
if (!getARMSubtarget().isThumb1Only())
- addPass(IfConverterID);
+ addPass(&IfConverterID);
}
if (getARMSubtarget().isThumb2())
- PM->add(createThumb2ITBlockPass());
+ addPass(createThumb2ITBlockPass());
return true;
}
@@ -183,13 +183,13 @@ bool ARMPassConfig::addPreSched2() {
bool ARMPassConfig::addPreEmitPass() {
if (getARMSubtarget().isThumb2()) {
if (!getARMSubtarget().prefers32BitThumb())
- PM->add(createThumb2SizeReductionPass());
+ addPass(createThumb2SizeReductionPass());
// Constant island pass work on unbundled instructions.
- addPass(UnpackMachineBundlesID);
+ addPass(&UnpackMachineBundlesID);
}
- PM->add(createARMConstantIslandPass());
+ addPass(createARMConstantIslandPass());
return true;
}
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index a5ea1c2..3d85ca7 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -24,20 +24,11 @@ using namespace dwarf;
void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
+ bool isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI();
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI();
+ InitializeELF(isAAPCS_ABI);
if (isAAPCS_ABI) {
- StaticCtorSection =
- getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
- ELF::SHF_WRITE |
- ELF::SHF_ALLOC,
- SectionKind::getDataRel());
- StaticDtorSection =
- getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
- ELF::SHF_WRITE |
- ELF::SHF_ALLOC,
- SectionKind::getDataRel());
LSDASection = NULL;
}
@@ -47,33 +38,3 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
0,
SectionKind::getMetadata());
}
-
-const MCSection *
-ARMElfTargetObjectFile::getStaticCtorSection(unsigned Priority) const {
- if (!isAAPCS_ABI)
- return TargetLoweringObjectFileELF::getStaticCtorSection(Priority);
-
- if (Priority == 65535)
- return StaticCtorSection;
-
- // Emit ctors in priority order.
- std::string Name = std::string(".init_array.") + utostr(Priority);
- return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY,
- ELF::SHF_ALLOC | ELF::SHF_WRITE,
- SectionKind::getDataRel());
-}
-
-const MCSection *
-ARMElfTargetObjectFile::getStaticDtorSection(unsigned Priority) const {
- if (!isAAPCS_ABI)
- return TargetLoweringObjectFileELF::getStaticDtorSection(Priority);
-
- if (Priority == 65535)
- return StaticDtorSection;
-
- // Emit dtors in priority order.
- std::string Name = std::string(".fini_array.") + utostr(Priority);
- return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY,
- ELF::SHF_ALLOC | ELF::SHF_WRITE,
- SectionKind::getDataRel());
-}
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index ff21060..c6a7261 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -20,7 +20,6 @@ class TargetMachine;
class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
protected:
const MCSection *AttributesSection;
- bool isAAPCS_ABI;
public:
ARMElfTargetObjectFile() :
TargetLoweringObjectFileELF(),
@@ -32,9 +31,6 @@ public:
virtual const MCSection *getAttributesSection() const {
return AttributesSection;
}
-
- const MCSection * getStaticCtorSection(unsigned Priority) const;
- const MCSection * getStaticDtorSection(unsigned Priority) const;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 2c53e3f..3a5957b 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -236,7 +236,10 @@ public:
Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY,
Match_RequiresNotITBlock,
Match_RequiresV6,
- Match_RequiresThumb2
+ Match_RequiresThumb2,
+#define GET_OPERAND_DIAGNOSTIC_TYPES
+#include "ARMGenAsmMatcher.inc"
+
};
ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
@@ -793,6 +796,13 @@ public:
int64_t Value = CE->getValue();
return Value > 0 && Value <= 32;
}
+ bool isAdrLabel() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, but it can't fit
+ // into shift immediate encoding, we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm())) return true;
+ else return (isARMSOImm() || isARMSOImmNeg());
+ }
bool isARMSOImm() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -914,7 +924,9 @@ public:
// Immediate offset in range [-255, 255].
if (!Memory.OffsetImm) return true;
int64_t Val = Memory.OffsetImm->getValue();
- return Val > -256 && Val < 256;
+ // The #-0 offset is encoded as INT32_MIN, and we have to check
+ // for this too.
+ return (Val > -256 && Val < 256) || Val == INT32_MIN;
}
bool isAM3Offset() const {
if (Kind != k_Immediate && Kind != k_PostIndexRegister)
@@ -1028,7 +1040,8 @@ public:
// Immediate offset a multiple of 4 in range [-1020, 1020].
if (!Memory.OffsetImm) return true;
int64_t Val = Memory.OffsetImm->getValue();
- return Val >= -1020 && Val <= 1020 && (Val & 3) == 0;
+ // Special case, #-0 is INT32_MIN.
+ return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || Val == INT32_MIN;
}
bool isMemImm0_1020s4Offset() const {
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
@@ -1446,8 +1459,10 @@ public:
assert(isRegShiftedImm() &&
"addRegShiftedImmOperands() on non RegShiftedImm!");
Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg));
+ // Shift of #32 is encoded as 0 where permitted
+ unsigned Imm = (RegShiftedImm.ShiftImm == 32 ? 0 : RegShiftedImm.ShiftImm);
Inst.addOperand(MCOperand::CreateImm(
- ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, RegShiftedImm.ShiftImm)));
+ ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, Imm)));
}
void addShifterImmOperands(MCInst &Inst, unsigned N) const {
@@ -1637,6 +1652,22 @@ public:
Inst.addOperand(MCOperand::CreateImm(Imm));
}
+ void addAdrLabelOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ assert(isImm() && "Not an immediate!");
+
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup.
+ if (!isa<MCConstantExpr>(getImm())) {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ return;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ int Val = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
@@ -2301,7 +2332,7 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << "<ccout " << getReg() << ">";
break;
case k_ITCondMask: {
- static const char *MaskStr[] = {
+ static const char *const MaskStr[] = {
"()", "(t)", "(e)", "(tt)", "(et)", "(te)", "(ee)", "(ttt)", "(ett)",
"(tet)", "(eet)", "(tte)", "(ete)", "(tee)", "(eee)"
};
@@ -2672,7 +2703,7 @@ parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
return MatchOperand_NoMatch;
- unsigned CC = StringSwitch<unsigned>(Tok.getString())
+ unsigned CC = StringSwitch<unsigned>(Tok.getString().lower())
.Case("eq", ARMCC::EQ)
.Case("ne", ARMCC::NE)
.Case("hs", ARMCC::HS)
@@ -2877,7 +2908,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!RC->contains(EndReg))
return Error(EndLoc, "invalid register in register list");
// Ranges must go from low to high.
- if (getARMRegisterNumbering(Reg) > getARMRegisterNumbering(EndReg))
+ if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg))
return Error(EndLoc, "bad range in register list");
// Add all the registers in the range to the register list.
@@ -2904,13 +2935,13 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!RC->contains(Reg))
return Error(RegLoc, "invalid register in register list");
// List must be monotonically increasing.
- if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) {
+ if (MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) {
if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
Warning(RegLoc, "register list not in ascending order");
else
return Error(RegLoc, "register list not in ascending order");
}
- if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) {
+ if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) {
Warning(RegLoc, "duplicated register (" + RegTok.getString() +
") in register list");
continue;
@@ -3249,28 +3280,59 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
- StringRef OptStr = Tok.getString();
-
- unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()))
- .Case("sy", ARM_MB::SY)
- .Case("st", ARM_MB::ST)
- .Case("sh", ARM_MB::ISH)
- .Case("ish", ARM_MB::ISH)
- .Case("shst", ARM_MB::ISHST)
- .Case("ishst", ARM_MB::ISHST)
- .Case("nsh", ARM_MB::NSH)
- .Case("un", ARM_MB::NSH)
- .Case("nshst", ARM_MB::NSHST)
- .Case("unst", ARM_MB::NSHST)
- .Case("osh", ARM_MB::OSH)
- .Case("oshst", ARM_MB::OSHST)
- .Default(~0U);
+ unsigned Opt;
+
+ if (Tok.is(AsmToken::Identifier)) {
+ StringRef OptStr = Tok.getString();
+
+ Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower())
+ .Case("sy", ARM_MB::SY)
+ .Case("st", ARM_MB::ST)
+ .Case("sh", ARM_MB::ISH)
+ .Case("ish", ARM_MB::ISH)
+ .Case("shst", ARM_MB::ISHST)
+ .Case("ishst", ARM_MB::ISHST)
+ .Case("nsh", ARM_MB::NSH)
+ .Case("un", ARM_MB::NSH)
+ .Case("nshst", ARM_MB::NSHST)
+ .Case("unst", ARM_MB::NSHST)
+ .Case("osh", ARM_MB::OSH)
+ .Case("oshst", ARM_MB::OSHST)
+ .Default(~0U);
- if (Opt == ~0U)
- return MatchOperand_NoMatch;
+ if (Opt == ~0U)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ } else if (Tok.is(AsmToken::Hash) ||
+ Tok.is(AsmToken::Dollar) ||
+ Tok.is(AsmToken::Integer)) {
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ Parser.Lex(); // Eat the '#'.
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ const MCExpr *MemBarrierID;
+ if (getParser().ParseExpression(MemBarrierID)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(MemBarrierID);
+ if (!CE) {
+ Error(Loc, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+
+ int Val = CE->getValue();
+ if (Val & ~0xf) {
+ Error(Loc, "immediate value out of range");
+ return MatchOperand_ParseFail;
+ }
+
+ Opt = ARM_MB::RESERVED_0 + Val;
+ } else
+ return MatchOperand_ParseFail;
- Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
return MatchOperand_Success;
}
@@ -3280,7 +3342,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
StringRef IFlagsStr = Tok.getString();
// An iflags string of "none" is interpreted to mean that none of the AIF
@@ -3320,26 +3383,51 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// See ARMv6-M 10.1.1
std::string Name = Mask.lower();
unsigned FlagsVal = StringSwitch<unsigned>(Name)
- .Case("apsr", 0)
- .Case("iapsr", 1)
- .Case("eapsr", 2)
- .Case("xpsr", 3)
- .Case("ipsr", 5)
- .Case("epsr", 6)
- .Case("iepsr", 7)
- .Case("msp", 8)
- .Case("psp", 9)
- .Case("primask", 16)
- .Case("basepri", 17)
- .Case("basepri_max", 18)
- .Case("faultmask", 19)
- .Case("control", 20)
+ // Note: in the documentation:
+ // ARM deprecates using MSR APSR without a _<bits> qualifier as an alias
+ // for MSR APSR_nzcvq.
+ // but we do make it an alias here. This is so to get the "mask encoding"
+ // bits correct on MSR APSR writes.
+ //
+ // FIXME: Note the 0xc00 "mask encoding" bits version of the registers
+ // should really only be allowed when writing a special register. Note
+ // they get dropped in the MRS instruction reading a special register as
+ // the SYSm field is only 8 bits.
+ //
+ // FIXME: the _g and _nzcvqg versions are only allowed if the processor
+ // includes the DSP extension but that is not checked.
+ .Case("apsr", 0x800)
+ .Case("apsr_nzcvq", 0x800)
+ .Case("apsr_g", 0x400)
+ .Case("apsr_nzcvqg", 0xc00)
+ .Case("iapsr", 0x801)
+ .Case("iapsr_nzcvq", 0x801)
+ .Case("iapsr_g", 0x401)
+ .Case("iapsr_nzcvqg", 0xc01)
+ .Case("eapsr", 0x802)
+ .Case("eapsr_nzcvq", 0x802)
+ .Case("eapsr_g", 0x402)
+ .Case("eapsr_nzcvqg", 0xc02)
+ .Case("xpsr", 0x803)
+ .Case("xpsr_nzcvq", 0x803)
+ .Case("xpsr_g", 0x403)
+ .Case("xpsr_nzcvqg", 0xc03)
+ .Case("ipsr", 0x805)
+ .Case("epsr", 0x806)
+ .Case("iepsr", 0x807)
+ .Case("msp", 0x808)
+ .Case("psp", 0x809)
+ .Case("primask", 0x810)
+ .Case("basepri", 0x811)
+ .Case("basepri_max", 0x812)
+ .Case("faultmask", 0x813)
+ .Case("control", 0x814)
.Default(~0U);
if (FlagsVal == ~0U)
return MatchOperand_NoMatch;
- if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19)
+ if (!hasV7Ops() && FlagsVal >= 0x811 && FlagsVal <= 0x813)
// basepri, basepri_max and faultmask only valid for V7m.
return MatchOperand_NoMatch;
@@ -5216,8 +5304,8 @@ validateInstruction(MCInst &Inst,
case ARM::LDRD_POST:
case ARM::LDREXD: {
// Rt2 must be Rt + 1.
- unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
- unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
if (Rt2 != Rt + 1)
return Error(Operands[3]->getStartLoc(),
"destination operands must be sequential");
@@ -5225,8 +5313,8 @@ validateInstruction(MCInst &Inst,
}
case ARM::STRD: {
// Rt2 must be Rt + 1.
- unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
- unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
if (Rt2 != Rt + 1)
return Error(Operands[3]->getStartLoc(),
"source operands must be sequential");
@@ -5236,8 +5324,8 @@ validateInstruction(MCInst &Inst,
case ARM::STRD_POST:
case ARM::STREXD: {
// Rt2 must be Rt + 1.
- unsigned Rt = getARMRegisterNumbering(Inst.getOperand(1).getReg());
- unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(2).getReg());
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg());
if (Rt2 != Rt + 1)
return Error(Operands[3]->getStartLoc(),
"source operands must be sequential");
@@ -5315,6 +5403,16 @@ validateInstruction(MCInst &Inst,
"registers must be in range r0-r7");
break;
}
+ case ARM::tADDrSP: {
+ // If the non-SP source operand and the destination operand are not the
+ // same, we need thumb2 (for the wide encoding), or we have an error.
+ if (!isThumbTwo() &&
+ Inst.getOperand(0).getReg() != Inst.getOperand(2).getReg()) {
+ return Error(Operands[4]->getStartLoc(),
+ "source register must be the same as destination");
+ }
+ break;
+ }
}
return false;
@@ -6750,8 +6848,8 @@ processInstruction(MCInst &Inst,
case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break;
}
- unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
- if (Ammount == 32) Ammount = 0;
+ unsigned Amount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
+ if (Amount == 32) Amount = 0;
TmpInst.setOpcode(newOpc);
TmpInst.addOperand(Inst.getOperand(0)); // Rd
if (isNarrow)
@@ -6759,7 +6857,7 @@ processInstruction(MCInst &Inst,
Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
TmpInst.addOperand(Inst.getOperand(1)); // Rn
if (newOpc != ARM::t2RRX)
- TmpInst.addOperand(MCOperand::CreateImm(Ammount));
+ TmpInst.addOperand(MCOperand::CreateImm(Amount));
TmpInst.addOperand(Inst.getOperand(3)); // CondCode
TmpInst.addOperand(Inst.getOperand(4));
if (!isNarrow)
@@ -6809,6 +6907,9 @@ processInstruction(MCInst &Inst,
// A shift by zero is a plain MOVr, not a MOVsi.
unsigned Amt = Inst.getOperand(2).getImm();
unsigned Opc = Amt == 0 ? ARM::MOVr : ARM::MOVsi;
+ // A shift by 32 should be encoded as 0 when permitted
+ if (Amt == 32 && (ShiftTy == ARM_AM::lsr || ShiftTy == ARM_AM::asr))
+ Amt = 0;
unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, Amt);
MCInst TmpInst;
TmpInst.setOpcode(Opc);
@@ -6985,6 +7086,16 @@ processInstruction(MCInst &Inst,
Inst = TmpInst;
return true;
}
+ case ARM::tADDrSP: {
+ // If the non-SP source operand and the destination operand are not the
+ // same, we need to use the 32-bit encoding if it's available.
+ if (Inst.getOperand(0).getReg() != Inst.getOperand(2).getReg()) {
+ Inst.setOpcode(ARM::t2ADDrr);
+ Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ return true;
+ }
+ break;
+ }
case ARM::tB:
// A Thumb conditional branch outside of an IT block is a tBcc.
if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) {
@@ -7154,7 +7265,9 @@ processInstruction(MCInst &Inst,
}
case ARM::MOVsi: {
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm());
- if (SOpc == ARM_AM::rrx) return false;
+ // rrx shifts and asr/lsr of #32 is encoded as 0
+ if (SOpc == ARM_AM::rrx || SOpc == ARM_AM::asr || SOpc == ARM_AM::lsr)
+ return false;
if (ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()) == 0) {
// Shifting by zero is accepted as a vanilla 'MOVr'
MCInst TmpInst;
@@ -7188,7 +7301,9 @@ processInstruction(MCInst &Inst,
case ARM::ADDrsi: newOpc = ARM::ADDrr; break;
}
// If the shift is by zero, use the non-shifted instruction definition.
- if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0) {
+ // The exception is for right shifts, where 0 == 32
+ if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0 &&
+ !(SOpc == ARM_AM::lsr || SOpc == ARM_AM::asr)) {
MCInst TmpInst;
TmpInst.setOpcode(newOpc);
TmpInst.addOperand(Inst.getOperand(0));
@@ -7207,9 +7322,7 @@ processInstruction(MCInst &Inst,
// The mask bits for all but the first condition are represented as
// the low bit of the condition code value implies 't'. We currently
// always have 1 implies 't', so XOR toggle the bits if the low bit
- // of the condition code is zero. The encoding also expects the low
- // bit of the condition to be encoded as bit 4 of the mask operand,
- // so mask that in if needed
+ // of the condition code is zero.
MCOperand &MO = Inst.getOperand(1);
unsigned Mask = MO.getImm();
unsigned OrigMask = Mask;
@@ -7218,8 +7331,7 @@ processInstruction(MCInst &Inst,
assert(Mask && TZ <= 3 && "illegal IT mask value!");
for (unsigned i = 3; i != TZ; --i)
Mask ^= 1 << i;
- } else
- Mask |= 0x10;
+ }
MO.setImm(Mask);
// Set up the IT block state according to the IT instruction we just
@@ -7231,6 +7343,86 @@ processInstruction(MCInst &Inst,
ITState.FirstCond = true;
break;
}
+ case ARM::t2LSLrr:
+ case ARM::t2LSRrr:
+ case ARM::t2ASRrr:
+ case ARM::t2SBCrr:
+ case ARM::t2RORrr:
+ case ARM::t2BICrr:
+ {
+ // Assemblers should use the narrow encodings of these instructions when permissible.
+ if ((isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg())) &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+ ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
+ (!static_cast<ARMOperand*>(Operands[3])->isToken() ||
+ !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ARM::t2LSLrr: NewOpc = ARM::tLSLrr; break;
+ case ARM::t2LSRrr: NewOpc = ARM::tLSRrr; break;
+ case ARM::t2ASRrr: NewOpc = ARM::tASRrr; break;
+ case ARM::t2SBCrr: NewOpc = ARM::tSBC; break;
+ case ARM::t2RORrr: NewOpc = ARM::tROR; break;
+ case ARM::t2BICrr: NewOpc = ARM::tBIC; break;
+ }
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+ case ARM::t2ANDrr:
+ case ARM::t2EORrr:
+ case ARM::t2ADCrr:
+ case ARM::t2ORRrr:
+ {
+ // Assemblers should use the narrow encodings of these instructions when permissible.
+ // These instructions are special in that they are commutable, so shorter encodings
+ // are available more often.
+ if ((isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg())) &&
+ (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() ||
+ Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) &&
+ ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
+ (!static_cast<ARMOperand*>(Operands[3])->isToken() ||
+ !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ARM::t2ADCrr: NewOpc = ARM::tADC; break;
+ case ARM::t2ANDrr: NewOpc = ARM::tAND; break;
+ case ARM::t2EORrr: NewOpc = ARM::tEOR; break;
+ case ARM::t2ORRrr: NewOpc = ARM::tORR; break;
+ }
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) {
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ } else {
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(1));
+ }
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
}
return false;
}
@@ -7277,6 +7469,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
return Match_Success;
}
+static const char *getSubtargetFeatureName(unsigned Val);
bool ARMAsmParser::
MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
@@ -7317,9 +7510,21 @@ MatchAndEmitInstruction(SMLoc IDLoc,
Inst.setLoc(IDLoc);
Out.EmitInstruction(Inst);
return false;
- case Match_MissingFeature:
- Error(IDLoc, "instruction requires a CPU feature not currently enabled");
- return true;
+ case Match_MissingFeature: {
+ assert(ErrorInfo && "Unknown missing feature!");
+ // Special case the error message for the very common case where only
+ // a single subtarget feature is missing (Thumb vs. ARM, e.g.).
+ std::string Msg = "instruction requires:";
+ unsigned Mask = 1;
+ for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
+ if (ErrorInfo & Mask) {
+ Msg += " ";
+ Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+ }
+ Mask <<= 1;
+ }
+ return Error(IDLoc, Msg);
+ }
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
if (ErrorInfo != ~0U) {
@@ -7336,7 +7541,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
return Error(IDLoc, "invalid instruction",
((ARMOperand*)Operands[0])->getLocRange());
case Match_ConversionFail:
- // The converter function will have already emited a diagnostic.
+ // The converter function will have already emitted a diagnostic.
return true;
case Match_RequiresNotITBlock:
return Error(IDLoc, "flag setting instruction only valid outside IT block");
@@ -7346,6 +7551,11 @@ MatchAndEmitInstruction(SMLoc IDLoc,
return Error(IDLoc, "instruction variant requires ARMv6 or later");
case Match_RequiresThumb2:
return Error(IDLoc, "instruction variant requires Thumb2");
+ case Match_ImmRange0_15: {
+ SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
+ }
}
llvm_unreachable("Implement any new match types added!");
@@ -7582,5 +7792,6 @@ extern "C" void LLVMInitializeARMAsmParser() {
}
#define GET_REGISTER_MATCHER
+#define GET_SUBTARGET_FEATURE_NAME
#define GET_MATCHER_IMPLEMENTATION
#include "ARMGenAsmMatcher.inc"
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 9a2aab5..ac916cc 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -49,6 +49,8 @@ add_llvm_target(ARMCodeGen
Thumb2SizeReduction.cpp
)
+add_dependencies(LLVMARMCodeGen intrinsics_gen)
+
# workaround for hanging compilation on MSVC9, 10
if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 )
set_property(
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 912935d..e47bf66 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -24,12 +24,66 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include <vector>
using namespace llvm;
typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
+ // Handles the condition code status of instructions in IT blocks
+ class ITStatus
+ {
+ public:
+ // Returns the condition code for instruction in IT block
+ unsigned getITCC() {
+ unsigned CC = ARMCC::AL;
+ if (instrInITBlock())
+ CC = ITStates.back();
+ return CC;
+ }
+
+ // Advances the IT block state to the next T or E
+ void advanceITState() {
+ ITStates.pop_back();
+ }
+
+ // Returns true if the current instruction is in an IT block
+ bool instrInITBlock() {
+ return !ITStates.empty();
+ }
+
+ // Returns true if current instruction is the last instruction in an IT block
+ bool instrLastInITBlock() {
+ return ITStates.size() == 1;
+ }
+
+ // Called when decoding an IT instruction. Sets the IT state for the following
+ // instructions that for the IT block. Firstcond and Mask correspond to the
+ // fields in the IT instruction encoding.
+ void setITState(char Firstcond, char Mask) {
+ // (3 - the number of trailing zeros) is the number of then / else.
+ unsigned CondBit0 = Firstcond & 1;
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ unsigned char CCBits = static_cast<unsigned char>(Firstcond & 0xf);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ // push condition codes onto the stack the correct order for the pops
+ for (unsigned Pos = NumTZ+1; Pos <= 3; ++Pos) {
+ bool T = ((Mask >> Pos) & 1) == CondBit0;
+ if (T)
+ ITStates.push_back(CCBits);
+ else
+ ITStates.push_back(CCBits ^ 1);
+ }
+ ITStates.push_back(CCBits);
+ }
+
+ private:
+ std::vector<unsigned char> ITStates;
+ };
+}
+
+namespace {
/// ARMDisassembler - ARM disassembler for all ARM platforms.
class ARMDisassembler : public MCDisassembler {
public:
@@ -78,7 +132,7 @@ public:
/// getEDInfo - See MCDisassembler.
const EDInstInfo *getEDInfo() const;
private:
- mutable std::vector<unsigned> ITBlock;
+ mutable ITStatus ITBlock;
DecodeStatus AddThumbPredicate(MCInst&) const;
void UpdateThumbVFPPredicate(MCInst&) const;
};
@@ -549,7 +603,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
/// These can often be values in a literal pool near the Address of the
/// instruction. The Address of the instruction and its immediate Value are
/// used as a possible literal pool entry. The SymbolLookUp call back will
-/// return the name of a symbol referenced by the the literal pool's entry if
+/// return the name of a symbol referenced by the literal pool's entry if
/// the referenced address is that of a symbol. Or it will return a pointer to
/// a literal 'C' string if the referenced address of the literal pool's entry
/// is an address into a section with 'C' string literals.
@@ -612,7 +666,7 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
case ARM::tSETEND:
// Some instructions (mostly conditional branches) are not
// allowed in IT blocks.
- if (!ITBlock.empty())
+ if (ITBlock.instrInITBlock())
S = SoftFail;
else
return Success;
@@ -623,7 +677,7 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
case ARM::t2TBH:
// Some instructions (mostly unconditional branches) can
// only appears at the end of, or outside of, an IT.
- if (ITBlock.size() > 1)
+ if (ITBlock.instrInITBlock() && !ITBlock.instrLastInITBlock())
S = SoftFail;
break;
default:
@@ -633,13 +687,11 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
// If we're in an IT block, base the predicate on that. Otherwise,
// assume a predicate of AL.
unsigned CC;
- if (!ITBlock.empty()) {
- CC = ITBlock.back();
- if (CC == 0xF)
- CC = ARMCC::AL;
- ITBlock.pop_back();
- } else
+ CC = ITBlock.getITCC();
+ if (CC == 0xF)
CC = ARMCC::AL;
+ if (ITBlock.instrInITBlock())
+ ITBlock.advanceITState();
const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
@@ -674,11 +726,9 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
// context as a post-pass.
void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const {
unsigned CC;
- if (!ITBlock.empty()) {
- CC = ITBlock.back();
- ITBlock.pop_back();
- } else
- CC = ARMCC::AL;
+ CC = ITBlock.getITCC();
+ if (ITBlock.instrInITBlock())
+ ITBlock.advanceITState();
const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
MCInst::iterator I = MI.begin();
@@ -726,7 +776,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
result = decodeThumbSBitInstruction16(MI, insn16, Address, this, STI);
if (result) {
Size = 2;
- bool InITBlock = !ITBlock.empty();
+ bool InITBlock = ITBlock.instrInITBlock();
Check(result, AddThumbPredicate(MI));
AddThumb1SBit(MI, InITBlock);
return result;
@@ -739,7 +789,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// Nested IT blocks are UNPREDICTABLE. Must be checked before we add
// the Thumb predicate.
- if (MI.getOpcode() == ARM::t2IT && !ITBlock.empty())
+ if (MI.getOpcode() == ARM::t2IT && ITBlock.instrInITBlock())
result = MCDisassembler::SoftFail;
Check(result, AddThumbPredicate(MI));
@@ -749,21 +799,9 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// to the subsequent instructions.
if (MI.getOpcode() == ARM::t2IT) {
- // (3 - the number of trailing zeros) is the number of then / else.
- unsigned firstcond = MI.getOperand(0).getImm();
+ unsigned Firstcond = MI.getOperand(0).getImm();
unsigned Mask = MI.getOperand(1).getImm();
- unsigned CondBit0 = Mask >> 4 & 1;
- unsigned NumTZ = CountTrailingZeros_32(Mask);
- assert(NumTZ <= 3 && "Invalid IT mask!");
- for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
- bool T = ((Mask >> Pos) & 1) == CondBit0;
- if (T)
- ITBlock.insert(ITBlock.begin(), firstcond);
- else
- ITBlock.insert(ITBlock.begin(), firstcond ^ 1);
- }
-
- ITBlock.push_back(firstcond);
+ ITBlock.setITState(Firstcond, Mask);
}
return result;
@@ -783,7 +821,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
result = decodeThumbInstruction32(MI, insn32, Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
- bool InITBlock = ITBlock.size();
+ bool InITBlock = ITBlock.instrInITBlock();
Check(result, AddThumbPredicate(MI));
AddThumb1SBit(MI, InITBlock);
return result;
@@ -1186,8 +1224,8 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Vd = fieldFromInstruction32(Val, 8, 4);
- unsigned regs = Val & 0xFF;
+ unsigned Vd = fieldFromInstruction32(Val, 8, 5);
+ unsigned regs = fieldFromInstruction32(Val, 0, 8);
if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -1203,8 +1241,10 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Vd = fieldFromInstruction32(Val, 8, 4);
- unsigned regs = (Val & 0xFF) / 2;
+ unsigned Vd = fieldFromInstruction32(Val, 8, 5);
+ unsigned regs = fieldFromInstruction32(Val, 0, 8);
+
+ regs = regs >> 1;
if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2976,7 +3016,7 @@ static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4,
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<21>(Val) + 4,
true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val)));
return MCDisassembler::Success;
@@ -3111,9 +3151,14 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- int imm = Val & 0xFF;
- if (!(Val & 0x100)) imm *= -1;
- Inst.addOperand(MCOperand::CreateImm(imm << 2));
+ if (Val == 0)
+ Inst.addOperand(MCOperand::CreateImm(INT32_MIN));
+ else {
+ int imm = Val & 0xFF;
+
+ if (!(Val & 0x100)) imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm << 2));
+ }
return MCDisassembler::Success;
}
@@ -3258,9 +3303,9 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
return MCDisassembler::Fail;
- Inst.addOperand(MCOperand::CreateReg(ARM::SP));
} else if (Inst.getOpcode() == ARM::tADDspr) {
unsigned Rm = fieldFromInstruction16(Insn, 3, 4);
@@ -3299,10 +3344,25 @@ static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
+ // Val is passed in as S:J1:J2:imm10H:imm10L:'0'
+ // Note only one trailing zero not two. Also the J1 and J2 values are from
+ // the encoded instruction. So here change to I1 and I2 values via:
+ // I1 = NOT(J1 EOR S);
+ // I2 = NOT(J2 EOR S);
+ // and build the imm32 with two trailing zeros as documented:
+ // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:'00', 32);
+ unsigned S = (Val >> 23) & 1;
+ unsigned J1 = (Val >> 22) & 1;
+ unsigned J2 = (Val >> 21) & 1;
+ unsigned I1 = !(J1 ^ S);
+ unsigned I2 = !(J2 ^ S);
+ unsigned tmp = (Val & ~0x600000) | (I1 << 22) | (I2 << 21);
+ int imm32 = SignExtend32<25>(tmp << 1);
+
if (!tryAddingSymbolicOperand(Address,
- (Address & ~2u) + SignExtend32<22>(Val << 1) + 4,
+ (Address & ~2u) + imm32 + 4,
true, 4, Inst, Decoder))
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
+ Inst.addOperand(MCOperand::CreateImm(imm32));
return MCDisassembler::Success;
}
@@ -3408,35 +3468,39 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
static DecodeStatus
DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder){
- if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<8>(Val<<1) + 4,
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<9>(Val<<1) + 4,
true, 2, Inst, Decoder))
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<8>(Val << 1)));
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<9>(Val << 1)));
return MCDisassembler::Success;
}
static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder){
- if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4,
+ // Val is passed in as S:J1:J2:imm10:imm11
+ // Note no trailing zero after imm11. Also the J1 and J2 values are from
+ // the encoded instruction. So here change to I1 and I2 values via:
+ // I1 = NOT(J1 EOR S);
+ // I2 = NOT(J2 EOR S);
+ // and build the imm32 with one trailing zero as documented:
+ // imm32 = SignExtend(S:I1:I2:imm10:imm11:'0', 32);
+ unsigned S = (Val >> 23) & 1;
+ unsigned J1 = (Val >> 22) & 1;
+ unsigned J2 = (Val >> 21) & 1;
+ unsigned I1 = !(J1 ^ S);
+ unsigned I2 = !(J2 ^ S);
+ unsigned tmp = (Val & ~0x600000) | (I1 << 22) | (I2 << 21);
+ int imm32 = SignExtend32<25>(tmp << 1);
+
+ if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4,
true, 4, Inst, Decoder))
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
+ Inst.addOperand(MCOperand::CreateImm(imm32));
return MCDisassembler::Success;
}
static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- switch (Val) {
- default:
+ if (Val & ~0xf)
return MCDisassembler::Fail;
- case 0xF: // SY
- case 0xE: // ST
- case 0xB: // ISH
- case 0xA: // ISHST
- case 0x7: // NSH
- case 0x6: // NSHST
- case 0x3: // OSH
- case 0x2: // OSHST
- break;
- }
Inst.addOperand(MCOperand::CreateImm(Val));
return MCDisassembler::Success;
@@ -4128,9 +4192,9 @@ static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 5, 1);
unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+ Rm |= fieldFromInstruction32(Insn, 0, 4) << 1;
if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
S = MCDisassembler::SoftFail;
@@ -4154,9 +4218,9 @@ static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 5, 1);
unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+ Rm |= fieldFromInstruction32(Insn, 0, 4) << 1;
if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
S = MCDisassembler::SoftFail;
@@ -4179,19 +4243,14 @@ static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned pred = fieldFromInstruction16(Insn, 4, 4);
- // The InstPrinter needs to have the low bit of the predicate in
- // the mask operand to be able to print it properly.
- unsigned mask = fieldFromInstruction16(Insn, 0, 5);
+ unsigned mask = fieldFromInstruction16(Insn, 0, 4);
if (pred == 0xF) {
pred = 0xE;
S = MCDisassembler::SoftFail;
}
- if ((mask & 0xF) == 0) {
- // Preserve the high bit of the mask, which is the low bit of
- // the predicate.
- mask &= 0x10;
+ if (mask == 0x0) {
mask |= 0x8;
S = MCDisassembler::SoftFail;
}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index cbd81c1..8b9109e 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -52,6 +52,27 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot) {
unsigned Opcode = MI->getOpcode();
+ // Check for HINT instructions w/ canonical names.
+ if (Opcode == ARM::HINT || Opcode == ARM::t2HINT) {
+ switch (MI->getOperand(0).getImm()) {
+ case 0: O << "\tnop"; break;
+ case 1: O << "\tyield"; break;
+ case 2: O << "\twfe"; break;
+ case 3: O << "\twfi"; break;
+ case 4: O << "\tsev"; break;
+ default:
+ // Anything else should just print normally.
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+ printPredicateOperand(MI, 1, O);
+ if (Opcode == ARM::t2HINT)
+ O << ".w";
+ printAnnotation(O, Annot);
+ return;
+ }
+
// Check for MOVs and print canonical forms, instead.
if (Opcode == ARM::MOVsr) {
// FIXME: Thumb variants?
@@ -426,9 +447,13 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
return;
}
- if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
+ //If the op is sub we have to print the immediate even if it is 0
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
+ ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
+
+ if (ImmOffs || (op == ARM_AM::sub))
O << ", #"
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+ << ARM_AM::getAddrOpcStr(op)
<< ImmOffs;
O << ']';
}
@@ -643,22 +668,50 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
unsigned Mask = Op.getImm() & 0xf;
if (getAvailableFeatures() & ARM::FeatureMClass) {
- switch (Op.getImm()) {
+ unsigned SYSm = Op.getImm();
+ unsigned Opcode = MI->getOpcode();
+ // For reads of the special registers ignore the "mask encoding" bits
+ // which are only for writes.
+ if (Opcode == ARM::t2MRS_M)
+ SYSm &= 0xff;
+ switch (SYSm) {
default: llvm_unreachable("Unexpected mask value!");
- case 0: O << "apsr"; return;
- case 1: O << "iapsr"; return;
- case 2: O << "eapsr"; return;
- case 3: O << "xpsr"; return;
- case 5: O << "ipsr"; return;
- case 6: O << "epsr"; return;
- case 7: O << "iepsr"; return;
- case 8: O << "msp"; return;
- case 9: O << "psp"; return;
- case 16: O << "primask"; return;
- case 17: O << "basepri"; return;
- case 18: O << "basepri_max"; return;
- case 19: O << "faultmask"; return;
- case 20: O << "control"; return;
+ case 0:
+ case 0x800: O << "apsr"; return; // with _nzcvq bits is an alias for aspr
+ case 0x400: O << "apsr_g"; return;
+ case 0xc00: O << "apsr_nzcvqg"; return;
+ case 1:
+ case 0x801: O << "iapsr"; return; // with _nzcvq bits is an alias for iapsr
+ case 0x401: O << "iapsr_g"; return;
+ case 0xc01: O << "iapsr_nzcvqg"; return;
+ case 2:
+ case 0x802: O << "eapsr"; return; // with _nzcvq bits is an alias for eapsr
+ case 0x402: O << "eapsr_g"; return;
+ case 0xc02: O << "eapsr_nzcvqg"; return;
+ case 3:
+ case 0x803: O << "xpsr"; return; // with _nzcvq bits is an alias for xpsr
+ case 0x403: O << "xpsr_g"; return;
+ case 0xc03: O << "xpsr_nzcvqg"; return;
+ case 5:
+ case 0x805: O << "ipsr"; return;
+ case 6:
+ case 0x806: O << "epsr"; return;
+ case 7:
+ case 0x807: O << "iepsr"; return;
+ case 8:
+ case 0x808: O << "msp"; return;
+ case 9:
+ case 0x809: O << "psp"; return;
+ case 0x10:
+ case 0x810: O << "primask"; return;
+ case 0x11:
+ case 0x811: O << "basepri"; return;
+ case 0x12:
+ case 0x812: O << "basepri_max"; return;
+ case 0x13:
+ case 0x813: O << "faultmask"; return;
+ case 0x14:
+ case 0x814: O << "control"; return;
}
}
@@ -739,6 +792,25 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
}
+void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ if (MO.isExpr()) {
+ O << *MO.getExpr();
+ return;
+ }
+
+ int32_t OffImm = (int32_t)MO.getImm();
+
+ if (OffImm == INT32_MIN)
+ O << "#-0";
+ else if (OffImm < 0)
+ O << "#-" << -OffImm;
+ else
+ O << "#" << OffImm;
+}
+
void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
O << "#" << MI->getOperand(OpNum).getImm() * 4;
@@ -754,7 +826,8 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
// (3 - the number of trailing zeros) is the number of then / else.
unsigned Mask = MI->getOperand(OpNum).getImm();
- unsigned CondBit0 = Mask >> 4 & 1;
+ unsigned Firstcond = MI->getOperand(OpNum-1).getImm();
+ unsigned CondBit0 = Firstcond & 1;
unsigned NumTZ = CountTrailingZeros_32(Mask);
assert(NumTZ <= 3 && "Invalid IT mask!");
for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
@@ -899,12 +972,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
O << "[" << getRegisterName(MO1.getReg());
- int32_t OffImm = (int32_t)MO2.getImm() / 4;
+ int32_t OffImm = (int32_t)MO2.getImm();
+
+ assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
// Don't print +0.
- if (OffImm < 0)
- O << ", #-" << -OffImm * 4;
+ if (OffImm == INT32_MIN)
+ O << ", #-0";
+ else if (OffImm < 0)
+ O << ", #-" << -OffImm;
else if (OffImm > 0)
- O << ", #" << OffImm * 4;
+ O << ", #" << OffImm;
O << "]";
}
@@ -936,15 +1014,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- int32_t OffImm = (int32_t)MO1.getImm() / 4;
+ int32_t OffImm = (int32_t)MO1.getImm();
+
+ assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
// Don't print +0.
- if (OffImm != 0) {
- O << ", ";
- if (OffImm < 0)
- O << "#-" << -OffImm * 4;
- else if (OffImm > 0)
- O << "#" << OffImm * 4;
- }
+ if (OffImm == INT32_MIN)
+ O << ", #-0";
+ else if (OffImm < 0)
+ O << ", #-" << -OffImm;
+ else if (OffImm > 0)
+ O << ", #" << OffImm;
}
void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 8acb7ee..73d7bfd 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -73,6 +73,7 @@ public:
void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index d10bfc1..ac6ce64 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -12,6 +12,7 @@
#include "MCTargetDesc/ARMFixupKinds.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
@@ -84,7 +85,8 @@ public:
{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{ "fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
{ "fixup_arm_movt_hi16", 0, 20, 0 },
@@ -110,32 +112,7 @@ public:
void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFixup &Fixup, const MCFragment *DF,
MCValue &Target, uint64_t &Value,
- bool &IsResolved) {
- const MCSymbolRefExpr *A = Target.getSymA();
- // Some fixups to thumb function symbols need the low bit (thumb bit)
- // twiddled.
- if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 &&
- (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 &&
- (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 &&
- (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 &&
- (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
- (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
- if (A) {
- const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
- if (Asm.isThumbFunc(&Sym))
- Value |= 1;
- }
- }
- // We must always generate a relocation for BL/BLX instructions if we have
- // a symbol to reference, as the linker relies on knowing the destination
- // symbol's thumb-ness to get interworking right.
- if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_blx ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl))
- IsResolved = false;
- }
+ bool &IsResolved);
bool mayNeedRelaxation(const MCInst &Inst) const;
@@ -269,7 +246,9 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
-static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
+ MCContext *Ctx = NULL) {
+ unsigned Kind = Fixup.getKind();
switch (Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
@@ -322,7 +301,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
Value = -Value;
isAdd = false;
}
- assert ((Value < 4096) && "Out of range pc-relative fixup value!");
+ if (Ctx && Value >= 4096)
+ Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
Value |= isAdd << 23;
// Same addressing mode as fixup_arm_pcrel_10,
@@ -345,8 +325,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
Value = -Value;
opc = 2; // 0b0010
}
- assert(ARM_AM::getSOImmVal(Value) != -1 &&
- "Out of range pc-relative fixup value!");
+ if (Ctx && ARM_AM::getSOImmVal(Value) == -1)
+ Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
// Encode the immediate and shift the opcode into place.
return ARM_AM::getSOImmVal(Value) | (opc << 21);
}
@@ -414,39 +394,65 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
return swapped;
}
case ARM::fixup_arm_thumb_bl: {
- // The value doesn't encode the low bit (always zero) and is offset by
- // four. The value is encoded into disjoint bit positions in the destination
- // opcode. x = unchanged, I = immediate value bit, S = sign extension bit
- //
- // BL: xxxxxSIIIIIIIIII xxxxxIIIIIIIIIII
- //
- // Note that the halfwords are stored high first, low second; so we need
- // to transpose the fixup value here to map properly.
- unsigned isNeg = (int64_t(Value - 4) < 0) ? 1 : 0;
- uint32_t Binary = 0;
- Value = 0x3fffff & ((Value - 4) >> 1);
- Binary = (Value & 0x7ff) << 16; // Low imm11 value.
- Binary |= (Value & 0x1ffc00) >> 11; // High imm10 value.
- Binary |= isNeg << 10; // Sign bit.
- return Binary;
+ // The value doesn't encode the low bit (always zero) and is offset by
+ // four. The 32-bit immediate value is encoded as
+ // imm32 = SignExtend(S:I1:I2:imm10:imm11:0)
+ // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S).
+ // The value is encoded into disjoint bit positions in the destination
+ // opcode. x = unchanged, I = immediate value bit, S = sign extension bit,
+ // J = either J1 or J2 bit
+ //
+ // BL: xxxxxSIIIIIIIIII xxJxJIIIIIIIIIII
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ uint32_t offset = (Value - 4) >> 1;
+ uint32_t signBit = (offset & 0x800000) >> 23;
+ uint32_t I1Bit = (offset & 0x400000) >> 22;
+ uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit;
+ uint32_t I2Bit = (offset & 0x200000) >> 21;
+ uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit;
+ uint32_t imm10Bits = (offset & 0x1FF800) >> 11;
+ uint32_t imm11Bits = (offset & 0x000007FF);
+
+ uint32_t Binary = 0;
+ uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits);
+ uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
+ (uint16_t)imm11Bits);
+ Binary |= secondHalf << 16;
+ Binary |= firstHalf;
+ return Binary;
+
}
case ARM::fixup_arm_thumb_blx: {
- // The value doesn't encode the low two bits (always zero) and is offset by
- // four (see fixup_arm_thumb_cp). The value is encoded into disjoint bit
- // positions in the destination opcode. x = unchanged, I = immediate value
- // bit, S = sign extension bit, 0 = zero.
- //
- // BLX: xxxxxSIIIIIIIIII xxxxxIIIIIIIIII0
- //
- // Note that the halfwords are stored high first, low second; so we need
- // to transpose the fixup value here to map properly.
- unsigned isNeg = (int64_t(Value-4) < 0) ? 1 : 0;
- uint32_t Binary = 0;
- Value = 0xfffff & ((Value - 2) >> 2);
- Binary = (Value & 0x3ff) << 17; // Low imm10L value.
- Binary |= (Value & 0xffc00) >> 10; // High imm10H value.
- Binary |= isNeg << 10; // Sign bit.
- return Binary;
+ // The value doesn't encode the low two bits (always zero) and is offset by
+ // four (see fixup_arm_thumb_cp). The 32-bit immediate value is encoded as
+ // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:00)
+ // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S).
+ // The value is encoded into disjoint bit positions in the destination
+ // opcode. x = unchanged, I = immediate value bit, S = sign extension bit,
+ // J = either J1 or J2 bit, 0 = zero.
+ //
+ // BLX: xxxxxSIIIIIIIIII xxJxJIIIIIIIIII0
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ uint32_t offset = (Value - 2) >> 2;
+ uint32_t signBit = (offset & 0x400000) >> 22;
+ uint32_t I1Bit = (offset & 0x200000) >> 21;
+ uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit;
+ uint32_t I2Bit = (offset & 0x100000) >> 20;
+ uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit;
+ uint32_t imm10HBits = (offset & 0xFFC00) >> 10;
+ uint32_t imm10LBits = (offset & 0x3FF);
+
+ uint32_t Binary = 0;
+ uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits);
+ uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
+ ((uint16_t)imm10LBits) << 1);
+ Binary |= secondHalf << 16;
+ Binary |= firstHalf;
+ return Binary;
}
case ARM::fixup_arm_thumb_cp:
// Offset by 4, and don't encode the low two bits. Two bytes of that
@@ -473,7 +479,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
isAdd = false;
}
// The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
- assert ((Value < 256) && "Out of range pc-relative fixup value!");
+ if (Ctx && Value >= 256)
+ Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
Value = (Value & 0xf) | ((Value & 0xf0) << 4);
return Value | (isAdd << 23);
}
@@ -491,7 +498,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
}
// These values don't encode the low two bits since they're always zero.
Value >>= 2;
- assert ((Value < 256) && "Out of range pc-relative fixup value!");
+ if (Ctx && Value >= 256)
+ Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
Value |= isAdd << 23;
// Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
@@ -507,6 +515,43 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
}
}
+void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFixup &Fixup,
+ const MCFragment *DF,
+ MCValue &Target, uint64_t &Value,
+ bool &IsResolved) {
+ const MCSymbolRefExpr *A = Target.getSymA();
+ // Some fixups to thumb function symbols need the low bit (thumb bit)
+ // twiddled.
+ if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
+ if (A) {
+ const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+ if (Asm.isThumbFunc(&Sym))
+ Value |= 1;
+ }
+ }
+ // We must always generate a relocation for BL/BLX instructions if we have
+ // a symbol to reference, as the linker relies on knowing the destination
+ // symbol's thumb-ness to get interworking right.
+ if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_blx ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl))
+ IsResolved = false;
+
+ // Try to get the encoded value for the fixup as-if we're mapping it into
+ // the instruction. This allows adjustFixupValue() to issue a diagnostic
+ // if the value aren't invalid.
+ (void)adjustFixupValue(Fixup, Value, &Asm.getContext());
+}
+
namespace {
// FIXME: This should be in a separate file.
@@ -530,7 +575,7 @@ public:
void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value) const {
unsigned NumBytes = 4; // FIXME: 2 for Thumb
- Value = adjustFixupValue(Fixup.getKind(), Value);
+ Value = adjustFixupValue(Fixup, Value);
if (!Value) return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
@@ -615,7 +660,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- Value = adjustFixupValue(Fixup.getKind(), Value);
+ Value = adjustFixupValue(Fixup, Value);
if (!Value) return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index ae11be8..de48a0e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -120,14 +120,22 @@ namespace ARM_MB {
// The Memory Barrier Option constants map directly to the 4-bit encoding of
// the option field for memory barrier operations.
enum MemBOpt {
- SY = 15,
- ST = 14,
- ISH = 11,
- ISHST = 10,
- NSH = 7,
- NSHST = 6,
+ RESERVED_0 = 0,
+ RESERVED_1 = 1,
+ OSHST = 2,
OSH = 3,
- OSHST = 2
+ RESERVED_4 = 4,
+ RESERVED_5 = 5,
+ NSHST = 6,
+ NSH = 7,
+ RESERVED_8 = 8,
+ RESERVED_9 = 9,
+ ISHST = 10,
+ ISH = 11,
+ RESERVED_12 = 12,
+ RESERVED_13 = 13,
+ ST = 14,
+ SY = 15
};
inline static const char *MemBOptToString(unsigned val) {
@@ -135,92 +143,24 @@ namespace ARM_MB {
default: llvm_unreachable("Unknown memory operation");
case SY: return "sy";
case ST: return "st";
+ case RESERVED_13: return "#0xd";
+ case RESERVED_12: return "#0xc";
case ISH: return "ish";
case ISHST: return "ishst";
+ case RESERVED_9: return "#0x9";
+ case RESERVED_8: return "#0x8";
case NSH: return "nsh";
case NSHST: return "nshst";
+ case RESERVED_5: return "#0x5";
+ case RESERVED_4: return "#0x4";
case OSH: return "osh";
case OSHST: return "oshst";
+ case RESERVED_1: return "#0x1";
+ case RESERVED_0: return "#0x0";
}
}
} // namespace ARM_MB
-/// getARMRegisterNumbering - Given the enum value for some register, e.g.
-/// ARM::LR, return the number that it corresponds to (e.g. 14).
-inline static unsigned getARMRegisterNumbering(unsigned Reg) {
- using namespace ARM;
- switch (Reg) {
- default:
- llvm_unreachable("Unknown ARM register!");
- case R0: case S0: case D0: case Q0: return 0;
- case R1: case S1: case D1: case Q1: return 1;
- case R2: case S2: case D2: case Q2: return 2;
- case R3: case S3: case D3: case Q3: return 3;
- case R4: case S4: case D4: case Q4: return 4;
- case R5: case S5: case D5: case Q5: return 5;
- case R6: case S6: case D6: case Q6: return 6;
- case R7: case S7: case D7: case Q7: return 7;
- case R8: case S8: case D8: case Q8: return 8;
- case R9: case S9: case D9: case Q9: return 9;
- case R10: case S10: case D10: case Q10: return 10;
- case R11: case S11: case D11: case Q11: return 11;
- case R12: case S12: case D12: case Q12: return 12;
- case SP: case S13: case D13: case Q13: return 13;
- case LR: case S14: case D14: case Q14: return 14;
- case PC: case S15: case D15: case Q15: return 15;
-
- case S16: case D16: return 16;
- case S17: case D17: return 17;
- case S18: case D18: return 18;
- case S19: case D19: return 19;
- case S20: case D20: return 20;
- case S21: case D21: return 21;
- case S22: case D22: return 22;
- case S23: case D23: return 23;
- case S24: case D24: return 24;
- case S25: case D25: return 25;
- case S26: case D26: return 26;
- case S27: case D27: return 27;
- case S28: case D28: return 28;
- case S29: case D29: return 29;
- case S30: case D30: return 30;
- case S31: case D31: return 31;
-
- // Composite registers use the regnum of the first register in the list.
- /* Q0 */ case D0_D2: return 0;
- case D1_D2: case D1_D3: return 1;
- /* Q1 */ case D2_D4: return 2;
- case D3_D4: case D3_D5: return 3;
- /* Q2 */ case D4_D6: return 4;
- case D5_D6: case D5_D7: return 5;
- /* Q3 */ case D6_D8: return 6;
- case D7_D8: case D7_D9: return 7;
- /* Q4 */ case D8_D10: return 8;
- case D9_D10: case D9_D11: return 9;
- /* Q5 */ case D10_D12: return 10;
- case D11_D12: case D11_D13: return 11;
- /* Q6 */ case D12_D14: return 12;
- case D13_D14: case D13_D15: return 13;
- /* Q7 */ case D14_D16: return 14;
- case D15_D16: case D15_D17: return 15;
- /* Q8 */ case D16_D18: return 16;
- case D17_D18: case D17_D19: return 17;
- /* Q9 */ case D18_D20: return 18;
- case D19_D20: case D19_D21: return 19;
- /* Q10 */ case D20_D22: return 20;
- case D21_D22: case D21_D23: return 21;
- /* Q11 */ case D22_D24: return 22;
- case D23_D24: case D23_D25: return 23;
- /* Q12 */ case D24_D26: return 24;
- case D25_D26: case D25_D27: return 25;
- /* Q13 */ case D26_D28: return 26;
- case D27_D28: case D27_D29: return 27;
- /* Q14 */ case D28_D30: return 28;
- case D29_D30: case D29_D31: return 29;
- /* Q15 */
- }
-}
-
/// isARMLowRegister - Returns true if the register is a low register (r0-r7).
///
static inline bool isARMLowRegister(unsigned Reg) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index aa649ba..7d6acbc 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -178,9 +178,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
break;
}
break;
- case ARM::fixup_arm_uncondbl:
case ARM::fixup_arm_blx:
- case ARM::fixup_arm_uncondbranch:
+ case ARM::fixup_arm_uncondbl:
switch (Modifier) {
case MCSymbolRefExpr::VK_ARM_PLT:
Type = ELF::R_ARM_PLT32;
@@ -192,6 +191,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
break;
case ARM::fixup_arm_condbl:
case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
Type = ELF::R_ARM_JUMP24;
break;
case ARM::fixup_arm_movt_hi16:
@@ -252,10 +252,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case ARM::fixup_arm_thumb_cp:
case ARM::fixup_arm_thumb_br:
llvm_unreachable("Unimplemented");
- case ARM::fixup_arm_uncondbranch:
- Type = ELF::R_ARM_CALL;
- break;
case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
Type = ELF::R_ARM_JUMP24;
break;
case ARM::fixup_arm_movt_hi16:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 03e8d5f..d32805e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -22,40 +22,14 @@ EnableARMEHABI("arm-enable-ehabi", cl::Hidden,
cl::init(false));
-static const char *const arm_asm_table[] = {
- "{r0}", "r0",
- "{r1}", "r1",
- "{r2}", "r2",
- "{r3}", "r3",
- "{r4}", "r4",
- "{r5}", "r5",
- "{r6}", "r6",
- "{r7}", "r7",
- "{r8}", "r8",
- "{r9}", "r9",
- "{r10}", "r10",
- "{r11}", "r11",
- "{r12}", "r12",
- "{r13}", "r13",
- "{r14}", "r14",
- "{lr}", "lr",
- "{sp}", "sp",
- "{ip}", "ip",
- "{fp}", "fp",
- "{sl}", "sl",
- "{memory}", "memory",
- "{cc}", "cc",
- 0,0
-};
-
void ARMMCAsmInfoDarwin::anchor() { }
ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
- AsmTransCBE = arm_asm_table;
Data64bitsDirective = 0;
CommentString = "@";
Code16Directive = ".code\t16";
Code32Directive = ".code\t32";
+ UseDataRegionDirectives = true;
SupportsDebugInformation = true;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 10d1c48..94f1082 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -18,6 +18,7 @@
#include "MCTargetDesc/ARMMCExpr.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -38,11 +39,12 @@ class ARMMCCodeEmitter : public MCCodeEmitter {
void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
const MCInstrInfo &MCII;
const MCSubtargetInfo &STI;
+ const MCContext &CTX;
public:
ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
MCContext &ctx)
- : MCII(mcii), STI(sti) {
+ : MCII(mcii), STI(sti), CTX(ctx) {
}
~ARMMCCodeEmitter() {}
@@ -336,6 +338,7 @@ public:
} // end anonymous namespace
MCCodeEmitter *llvm::createARMMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new ARMMCCodeEmitter(MCII, STI, Ctx);
@@ -404,7 +407,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const {
if (MO.isReg()) {
unsigned Reg = MO.getReg();
- unsigned RegNo = getARMRegisterNumbering(Reg);
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg);
// Q registers are encoded as 2x their register number.
switch (Reg) {
@@ -433,7 +436,7 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- Reg = getARMRegisterNumbering(MO.getReg());
+ Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
int32_t SImm = MO1.getImm();
bool isAdd = true;
@@ -640,8 +643,8 @@ getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
return Val;
}
-/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
-/// target.
+/// getAdrLabelOpValue - Return encoding info for 12-bit shifted-immediate
+/// ADR label target.
uint32_t ARMMCCodeEmitter::
getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
@@ -651,15 +654,23 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
Fixups);
int32_t offset = MO.getImm();
uint32_t Val = 0x2000;
- if (offset < 0) {
+
+ if (offset == INT32_MIN) {
+ Val = 0x1000;
+ offset = 0;
+ } else if (offset < 0) {
Val = 0x1000;
offset *= -1;
}
- Val |= offset;
+
+ int SoImmVal = ARM_AM::getSOImmVal(offset);
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ Val |= SoImmVal;
return Val;
}
-/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// getT2AdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
/// target.
uint32_t ARMMCCodeEmitter::
getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
@@ -669,14 +680,16 @@ getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
Fixups);
int32_t Val = MO.getImm();
- if (Val < 0) {
+ if (Val == INT32_MIN)
+ Val = 0x1000;
+ else if (Val < 0) {
Val *= -1;
Val |= 0x1000;
}
return Val;
}
-/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
+/// getThumbAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
/// target.
uint32_t ARMMCCodeEmitter::
getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
@@ -698,8 +711,8 @@ getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
// {2-0} = Rn
const MCOperand &MO1 = MI.getOperand(OpIdx);
const MCOperand &MO2 = MI.getOperand(OpIdx + 1);
- unsigned Rn = getARMRegisterNumbering(MO1.getReg());
- unsigned Rm = getARMRegisterNumbering(MO2.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
+ unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO2.getReg());
return (Rm << 3) | Rn;
}
@@ -715,7 +728,7 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
const MCOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
Imm12 = 0;
isAdd = false ; // 'U' bit is set as part of the fixup.
@@ -795,7 +808,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
const MCOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
Imm8 = 0;
isAdd = false ; // 'U' bit is set as part of the fixup.
@@ -831,7 +844,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
// {7-0} = imm8
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ unsigned Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
unsigned Imm8 = MO1.getImm();
return (Reg << 8) | Imm8;
}
@@ -861,11 +874,11 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
// Handle :upper16: and :lower16: assembly prefixes.
const MCExpr *E = MO.getExpr();
+ MCFixupKind Kind;
if (E->getKind() == MCExpr::Target) {
const ARMMCExpr *ARM16Expr = cast<ARMMCExpr>(E);
E = ARM16Expr->getSubExpr();
- MCFixupKind Kind;
switch (ARM16Expr->getKind()) {
default: llvm_unreachable("Unsupported ARMFixup");
case ARMMCExpr::VK_ARM_HI16:
@@ -891,9 +904,21 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
}
Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc()));
return 0;
- };
-
- llvm_unreachable("Unsupported MCExpr type in MCOperand!");
+ }
+ // If the expression doesn't have :upper16: or :lower16: on it,
+ // it's just a plain immediate expression, and those evaluate to
+ // the lower 16 bits of the expression regardless of whether
+ // we have a movt or a movw.
+ if (!isTargetDarwin() && EvaluateAsPCRel(E))
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movw_lo16_pcrel
+ : ARM::fixup_arm_movw_lo16_pcrel);
+ else
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movw_lo16
+ : ARM::fixup_arm_movw_lo16);
+ Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc()));
+ return 0;
}
uint32_t ARMMCCodeEmitter::
@@ -902,8 +927,8 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx+1);
const MCOperand &MO2 = MI.getOperand(OpIdx+2);
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
- unsigned Rm = getARMRegisterNumbering(MO1.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm());
bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add;
ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
@@ -933,7 +958,7 @@ getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
// {12} isAdd
// {11-0} imm12/Rm
const MCOperand &MO = MI.getOperand(OpIdx);
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups);
Binary |= Rn << 14;
return Binary;
@@ -956,7 +981,7 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm);
Binary <<= 7; // Shift amount is bits [11:7]
Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5]
- Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0]
+ Binary |= CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Rm is bits [3:0]
}
return Binary | (isAdd << 12) | (isReg << 13);
}
@@ -969,7 +994,7 @@ getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx+1);
bool isAdd = MO1.getImm() != 0;
- return getARMRegisterNumbering(MO.getReg()) | (isAdd << 4);
+ return CTX.getRegisterInfo().getEncodingValue(MO.getReg()) | (isAdd << 4);
}
uint32_t ARMMCCodeEmitter::
@@ -987,7 +1012,7 @@ getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
// if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
if (!isImm)
- Imm8 = getARMRegisterNumbering(MO.getReg());
+ Imm8 = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
return Imm8 | (isAdd << 8) | (isImm << 9);
}
@@ -1005,7 +1030,7 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
if (!MO.isReg()) {
- unsigned Rn = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
assert(MO.isExpr() && "Unexpected machine operand type!");
const MCExpr *Expr = MO.getExpr();
@@ -1015,14 +1040,14 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
++MCNumCPRelocations;
return (Rn << 9) | (1 << 13);
}
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
unsigned Imm = MO2.getImm();
bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
bool isImm = MO1.getReg() == 0;
uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
// if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
if (!isImm)
- Imm8 = getARMRegisterNumbering(MO1.getReg());
+ Imm8 = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13);
}
@@ -1050,7 +1075,7 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
// {2-0} = Rn
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
unsigned Imm5 = MO1.getImm();
return ((Imm5 & 0x1f) << 3) | Rn;
}
@@ -1077,7 +1102,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
const MCOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
Imm8 = 0;
isAdd = false; // 'U' bit is handled as part of the fixup.
@@ -1123,7 +1148,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
// Encode Rm.
- unsigned Binary = getARMRegisterNumbering(MO.getReg());
+ unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
// Encode the shift opcode.
unsigned SBits = 0;
@@ -1148,7 +1173,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
// Encode the shift operation Rs.
// Encode Rs bit[11:8].
assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
- return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ return Binary | (CTX.getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
}
unsigned ARMMCCodeEmitter::
@@ -1167,7 +1192,7 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
// Encode Rm.
- unsigned Binary = getARMRegisterNumbering(MO.getReg());
+ unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
// Encode the shift opcode.
unsigned SBits = 0;
@@ -1192,8 +1217,7 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx,
// Encode shift_imm bit[11:7].
Binary |= SBits << 4;
unsigned Offset = ARM_AM::getSORegOffset(MO1.getImm());
- assert(Offset && "Offset must be in range 1-32!");
- if (Offset == 32) Offset = 0;
+ assert(Offset < 32 && "Offset must be in range 0-31!");
return Binary | (Offset << 7);
}
@@ -1207,9 +1231,9 @@ getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
// Encoded as [Rn, Rm, imm].
// FIXME: Needs fixup support.
- unsigned Value = getARMRegisterNumbering(MO1.getReg());
+ unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
Value <<= 4;
- Value |= getARMRegisterNumbering(MO2.getReg());
+ Value |= CTX.getRegisterInfo().getEncodingValue(MO2.getReg());
Value <<= 2;
Value |= MO3.getImm();
@@ -1223,7 +1247,7 @@ getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
const MCOperand &MO2 = MI.getOperand(OpNum+1);
// FIXME: Needs fixup support.
- unsigned Value = getARMRegisterNumbering(MO1.getReg());
+ unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
// Even though the immediate is 8 bits long, we need 9 bits in order
// to represent the (inverse of the) sign bit.
@@ -1285,7 +1309,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
// Encode Rm.
- unsigned Binary = getARMRegisterNumbering(MO.getReg());
+ unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
// Encode the shift opcode.
unsigned SBits = 0;
@@ -1341,7 +1365,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
if (SPRRegs || DPRRegs) {
// VLDM/VSTM
- unsigned RegNo = getARMRegisterNumbering(Reg);
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg);
unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
Binary |= (RegNo & 0x1f) << 8;
if (SPRRegs)
@@ -1350,7 +1374,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
Binary |= NumRegs * 2;
} else {
for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
- unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(MI.getOperand(I).getReg());
Binary |= 1 << RegNo;
}
}
@@ -1366,7 +1390,7 @@ getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
const MCOperand &Reg = MI.getOperand(Op);
const MCOperand &Imm = MI.getOperand(Op + 1);
- unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
unsigned Align = 0;
switch (Imm.getImm()) {
@@ -1389,7 +1413,7 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
const MCOperand &Reg = MI.getOperand(Op);
const MCOperand &Imm = MI.getOperand(Op + 1);
- unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
unsigned Align = 0;
switch (Imm.getImm()) {
@@ -1415,7 +1439,7 @@ getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
const MCOperand &Reg = MI.getOperand(Op);
const MCOperand &Imm = MI.getOperand(Op + 1);
- unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
unsigned Align = 0;
switch (Imm.getImm()) {
@@ -1434,7 +1458,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(Op);
if (MO.getReg() == 0) return 0x0D;
- return getARMRegisterNumbering(MO.getReg());
+ return CTX.getRegisterInfo().getEncodingValue(MO.getReg());
}
unsigned ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index e3512cd..5df84c8 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -35,7 +35,7 @@
using namespace llvm;
-std::string ARM_MC::ParseARMTriple(StringRef TT) {
+std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
// Set the boolean corresponding to the current target triple, or the default
// if one cannot be determined, to true.
unsigned Len = TT.size();
@@ -51,27 +51,48 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) {
Idx = 6;
}
+ bool NoCPU = CPU == "generic" || CPU.empty();
std::string ARMArchFeature;
if (Idx) {
unsigned SubVer = TT[Idx];
if (SubVer >= '7' && SubVer <= '9') {
if (Len >= Idx+2 && TT[Idx+1] == 'm') {
- // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
+ if (NoCPU)
+ // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
} else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') {
- // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
- // FeatureT2XtPk, FeatureMClass
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass";
- } else
- // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
- ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
+ if (NoCPU)
+ // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
+ // FeatureT2XtPk, FeatureMClass
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
+ } else {
+ // v7 CPUs have lots of different feature sets. If no CPU is specified,
+ // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
+ // the "minimum" feature set and use CPU string to figure out the exact
+ // features.
+ if (NoCPU)
+ // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
+ ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
+ }
} else if (SubVer == '6') {
if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
ARMArchFeature = "+v6t2";
- else if (Len >= Idx+2 && TT[Idx+1] == 'm')
- // v6m: FeatureNoARM, FeatureMClass
- ARMArchFeature = "+v6t2,+noarm,+mclass";
- else
+ else if (Len >= Idx+2 && TT[Idx+1] == 'm') {
+ if (NoCPU)
+ // v6m: FeatureNoARM, FeatureMClass
+ ARMArchFeature = "+v6,+noarm,+mclass";
+ else
+ ARMArchFeature = "+v6";
+ } else
ARMArchFeature = "+v6";
} else if (SubVer == '5') {
if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
@@ -94,7 +115,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) {
MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
- std::string ArchFS = ARM_MC::ParseARMTriple(TT);
+ std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
if (!FS.empty()) {
if (!ArchFS.empty())
ArchFS = ArchFS + "," + FS.str();
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 88472d7..510302d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -23,6 +23,7 @@ class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
class MCObjectWriter;
+class MCRegisterInfo;
class MCSubtargetInfo;
class StringRef;
class Target;
@@ -31,7 +32,7 @@ class raw_ostream;
extern Target TheARMTarget, TheThumbTarget;
namespace ARM_MC {
- std::string ParseARMTriple(StringRef TT);
+ std::string ParseARMTriple(StringRef TT, StringRef CPU);
/// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance.
/// This is exposed so Asm parser, etc. do not need to go through
@@ -41,6 +42,7 @@ namespace ARM_MC {
}
MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 8057cb6..a51e0fa 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -190,7 +190,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
// 0 - arm instructions
// 1 - thumb instructions
// the other half of the relocated expression is in the following pair
- // relocation entry in the the low 16 bits of r_address field.
+ // relocation entry in the low 16 bits of r_address field.
unsigned ThumbBit = 0;
unsigned MovtBit = 0;
switch ((unsigned)Fixup.getKind()) {
@@ -408,15 +408,22 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// Even when it's not a scattered relocation, movw/movt always uses
// a PAIR relocation.
if (Type == macho::RIT_ARM_Half) {
- // The other-half value only gets populated for the movt relocation.
+ // The other-half value only gets populated for the movt and movw
+ // relocation entries.
uint32_t Value = 0;;
switch ((unsigned)Fixup.getKind()) {
default: break;
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ Value = (FixedValue >> 16) & 0xffff;
+ break;
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
- Value = FixedValue;
+ Value = FixedValue & 0xffff;
break;
}
macho::RelocationEntry MREPair;
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 2899836..ad60e32 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -220,7 +220,9 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
const MCInstrDesc &MCID1 = TII->get(MulOpc);
const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
- unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ unsigned TmpReg = MRI->createVirtualRegister(
+ TII->getRegClass(MCID1, 0, TRI, MF));
MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
.addReg(Src1Reg, getKillRegState(Src1Kill))
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 3eddda8..57dc6cb 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -710,3 +710,24 @@ targets, e.g., PPC, that share this behavior, it would be best to implement
this in a target-independent way: we should probably fold that (when using
"undefined at zero" semantics) to set the "defined at zero" bit and have
the code generator expand out the right code.
+
+
+//===---------------------------------------------------------------------===//
+
+Clean up the test/MC/ARM files to have more robust register choices.
+
+R0 should not be used as a register operand in the assembler tests as it's then
+not possible to distinguish between a correct encoding and a missing operand
+encoding, as zero is the default value for the binary encoder.
+e.g.,
+ add r0, r0 // bad
+ add r3, r5 // good
+
+Register operands should be distinct. That is, when the encoding does not
+require two syntactical operands to refer to the same register, two different
+registers should be used in the test so as to catch errors where the
+operands are swapped in the encoding.
+e.g.,
+ subs.w r1, r1, r1 // bad
+ subs.w r1, r2, r3 // good
+
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index e03e758..735b255 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -53,11 +53,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- assert((RC == ARM::tGPRRegisterClass ||
+ assert((RC == &ARM::tGPRRegClass ||
(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
isARMLowRegister(SrcReg))) && "Unknown regclass!");
- if (RC == ARM::tGPRRegisterClass ||
+ if (RC == &ARM::tGPRRegClass ||
(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
isARMLowRegister(SrcReg))) {
DebugLoc DL;
@@ -81,11 +81,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- assert((RC == ARM::tGPRRegisterClass ||
+ assert((RC == &ARM::tGPRRegClass ||
(TargetRegisterInfo::isPhysicalRegister(DestReg) &&
isARMLowRegister(DestReg))) && "Unknown regclass!");
- if (RC == ARM::tGPRRegisterClass ||
+ if (RC == &ARM::tGPRRegClass ||
(TargetRegisterInfo::isPhysicalRegister(DestReg) &&
isARMLowRegister(DestReg))) {
DebugLoc DL;
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index ef77bbd..a39b722 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -49,13 +49,14 @@ const TargetRegisterClass*
Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
const {
if (ARM::tGPRRegClass.hasSubClassEq(RC))
- return ARM::tGPRRegisterClass;
+ return &ARM::tGPRRegClass;
return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC);
}
const TargetRegisterClass *
-Thumb1RegisterInfo::getPointerRegClass(unsigned Kind) const {
- return ARM::tGPRRegisterClass;
+Thumb1RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
+ return &ARM::tGPRRegClass;
}
/// emitLoadConstPool - Emits a load from constpool to materialize the
@@ -109,7 +110,7 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
unsigned LdReg = DestReg;
if (DestReg == ARM::SP) {
assert(BaseReg == ARM::SP && "Unexpected!");
- LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
+ LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
}
if (NumBytes <= 255 && NumBytes >= 0)
@@ -693,7 +694,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// register. The offset is already handled in the vreg value.
MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
} else if (MI.mayStore()) {
- VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
+ VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
bool UseRR = false;
if (Opcode == ARM::tSTRspi) {
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 6971842..f2e4b08 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -30,7 +30,8 @@ public:
const TargetRegisterClass*
getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
- const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+ const TargetRegisterClass*
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index ecb4c2f..d54aa93 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -24,8 +24,6 @@ STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
namespace {
class Thumb2ITBlockPass : public MachineFunctionPass {
- bool PreRegAlloc;
-
public:
static char ID;
Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
@@ -76,16 +74,14 @@ static void TrackDefUses(MachineInstr *MI,
for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
unsigned Reg = LocalUses[i];
Uses.insert(Reg);
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
+ for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg)
Uses.insert(*Subreg);
}
for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
unsigned Reg = LocalDefs[i];
Defs.insert(Reg);
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
+ for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg)
Defs.insert(*Subreg);
if (Reg == ARM::CPSR)
continue;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 8ab486b..e9e20dd 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -126,9 +126,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass ||
- RC == ARM::GPRnopcRegisterClass) {
+ if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
+ RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
+ RC == &ARM::GPRnopcRegClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -153,9 +153,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass ||
- RC == ARM::GPRnopcRegisterClass) {
+ if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
+ RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
+ RC == &ARM::GPRnopcRegClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -563,48 +563,6 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
return Offset == 0;
}
-/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
-/// two-addrss instruction inserted by two-address pass.
-void
-Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
- MachineInstr *UseMI,
- const TargetRegisterInfo &TRI) const {
- if (SrcMI->getOpcode() != ARM::tMOVr || SrcMI->getOperand(1).isKill())
- return;
-
- unsigned PredReg = 0;
- ARMCC::CondCodes CC = getInstrPredicate(UseMI, PredReg);
- if (CC == ARMCC::AL || PredReg != ARM::CPSR)
- return;
-
- // Schedule the copy so it doesn't come between previous instructions
- // and UseMI which can form an IT block.
- unsigned SrcReg = SrcMI->getOperand(1).getReg();
- ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
- MachineBasicBlock *MBB = UseMI->getParent();
- MachineBasicBlock::iterator MBBI = SrcMI;
- unsigned NumInsts = 0;
- while (--MBBI != MBB->begin()) {
- if (MBBI->isDebugValue())
- continue;
-
- MachineInstr *NMI = &*MBBI;
- ARMCC::CondCodes NCC = getInstrPredicate(NMI, PredReg);
- if (!(NCC == CC || NCC == OCC) ||
- NMI->modifiesRegister(SrcReg, &TRI) ||
- NMI->modifiesRegister(ARM::CPSR, &TRI))
- break;
- if (++NumInsts == 4)
- // Too many in a row!
- return;
- }
-
- if (NumInsts) {
- MBB->remove(SrcMI);
- MBB->insert(++MBBI, SrcMI);
- }
-}
-
ARMCC::CondCodes
llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
unsigned Opc = MI->getOpcode();
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 0911f8a..2cdcd06 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -57,11 +57,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
- /// two-addrss instruction inserted by two-address pass.
- void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI,
- const TargetRegisterInfo &TRI) const;
-
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index b5a397e..f18f491 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -67,6 +67,7 @@ namespace {
{ ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 },
//FIXME: Disable CMN, as CCodes are backwards from compare expectations
//{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 },
+ { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0 },
{ ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 },
{ ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 },
{ ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 },
OpenPOWER on IntegriCloud