summaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.h1
-rw-r--r--lib/Target/ARM/ARM.td31
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp98
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h40
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp943
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h16
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp173
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h13
-rw-r--r--lib/Target/ARM/ARMCallingConv.td2
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp8
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp9
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h5
-rw-r--r--lib/Target/ARM/ARMELFWriterInfo.cpp78
-rw-r--r--lib/Target/ARM/ARMELFWriterInfo.h59
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp16
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp233
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp41
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp2
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp174
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp679
-rw-r--r--lib/Target/ARM/ARMISelLowering.h19
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td21
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp62
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td212
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td125
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td6
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td138
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td12
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp6
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h12
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td23
-rw-r--r--lib/Target/ARM/ARMSchedule.td2
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td597
-rw-r--r--lib/Target/ARM/ARMScheduleSwift.td1085
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp9
-rw-r--r--lib/Target/ARM/ARMSubtarget.h14
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp19
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h35
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp280
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp92
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp584
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp15
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp10
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h5
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp8
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp51
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp74
54 files changed, 4546 insertions, 1609 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 2a1e8e4..1446bbb 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -37,6 +37,7 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMGlobalBaseRegPass();
FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 69e2346..23974ad 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -32,9 +32,6 @@ def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
"Enable VFP3 instructions",
[FeatureVFP2]>;
-def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
- "Enable VFP4 instructions",
- [FeatureVFP3]>;
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable NEON instructions",
[FeatureVFP3]>;
@@ -44,10 +41,16 @@ def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
"Does not support ARM mode execution">;
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision floating point">;
+def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
+ "Enable VFP4 instructions",
+ [FeatureVFP3, FeatureFP16]>;
def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
"Restrict VFP3 to 16 double registers">;
def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
"Enable divide instructions">;
+def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
+ "HasHardwareDivideInARM", "true",
+ "Enable divide instructions in ARM mode">;
def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
"Enable Thumb2 extract and pack instructions">;
def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
@@ -139,6 +142,18 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
[FeatureVMLxForwarding,
FeatureT2XtPk, FeatureFP16,
FeatureAvoidPartialCPSR]>;
+def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
+ "Swift ARM processors",
+ [FeatureNEONForFP, FeatureT2XtPk,
+ FeatureVFP4, FeatureMP, FeatureHWDiv,
+ FeatureHWDivARM, FeatureAvoidPartialCPSR,
+ FeatureHasSlowFPVMLx]>;
+
+// FIXME: It has not been determined if A15 has these features.
+def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
+ "Cortex-A15 ARM processors",
+ [FeatureT2XtPk, FeatureFP16,
+ FeatureAvoidPartialCPSR]>;
class ProcNoItin<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
@@ -214,6 +229,10 @@ def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureMP,
FeatureHasRAS]>;
+// FIXME: A15 has currently the same ProcessorModel as A9.
+def : ProcessorModel<"cortex-a15", CortexA9Model,
+ [ProcA15, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureDSPThumb2, FeatureHasRAS]>;
// V7M Processors.
def : ProcNoItin<"cortex-m3", [HasV7Ops,
@@ -227,6 +246,12 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops,
FeatureT2XtPk, FeatureVFP4,
FeatureVFPOnlySP, FeatureMClass]>;
+// Swift uArch Processors.
+def : ProcessorModel<"swift", SwiftModel,
+ [ProcSwift, HasV7Ops, FeatureNEON,
+ FeatureDB, FeatureDSPThumb2,
+ FeatureHasRAS]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index e9e2803..d439d1d 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -23,6 +23,8 @@
#include "InstPrinter/ARMInstPrinter.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Constants.h"
#include "llvm/DebugInfo.h"
#include "llvm/Module.h"
@@ -40,9 +42,8 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -302,7 +303,7 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
}
void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
- uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+ uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
assert(Size && "C++ constructor pointer had zero size!");
const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
@@ -389,16 +390,6 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
//===--------------------------------------------------------------------===//
MCSymbol *ARMAsmPrinter::
-GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
- const MachineBasicBlock *MBB) const {
- SmallString<60> Name;
- raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
- << getFunctionNumber() << '_' << uid << '_' << uid2
- << "_set_" << MBB->getNumber();
- return OutContext.GetOrCreateSymbol(Name.str());
-}
-
-MCSymbol *ARMAsmPrinter::
GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
SmallString<60> Name;
raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI"
@@ -592,9 +583,24 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
const TargetLoweringObjectFileMachO &TLOFMacho =
static_cast<const TargetLoweringObjectFileMachO &>(
getObjFileLowering());
- OutStreamer.SwitchSection(TLOFMacho.getTextSection());
- OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
- OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection());
+
+ // Collect the set of sections our functions will go into.
+ SetVector<const MCSection *, SmallVector<const MCSection *, 8>,
+ SmallPtrSet<const MCSection *, 8> > TextSections;
+ // Default text section comes first.
+ TextSections.insert(TLOFMacho.getTextSection());
+ // Now any user defined text sections from function attributes.
+ for (Module::iterator F = M.begin(), e = M.end(); F != e; ++F)
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage())
+ TextSections.insert(TLOFMacho.SectionForGlobal(F, Mang, TM));
+ // Now the coalescable sections.
+ TextSections.insert(TLOFMacho.getTextCoalSection());
+ TextSections.insert(TLOFMacho.getConstTextCoalSection());
+
+ // Emit the sections in the .s file header to fix the order.
+ for (unsigned i = 0, e = TextSections.size(); i != e; ++i)
+ OutStreamer.SwitchSection(TextSections[i]);
+
if (RelocM == Reloc::DynamicNoPIC) {
const MCSection *sect =
OutContext.getMachOSection("__TEXT", "__symbol_stub4",
@@ -743,13 +749,28 @@ void ARMAsmPrinter::emitAttributes() {
AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
ARMBuildAttrs::Allowed);
} else if (CPUString == "generic") {
- // FIXME: Why these defaults?
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
+ // For a generic CPU, we assume a standard v7a architecture in Subtarget.
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
ARMBuildAttrs::Allowed);
AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::Allowed);
- }
+ ARMBuildAttrs::AllowThumb32);
+ } else if (Subtarget->hasV7Ops()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
+ } else if (Subtarget->hasV6T2Ops())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6T2);
+ else if (Subtarget->hasV6Ops())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6);
+ else if (Subtarget->hasV5TEOps())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TE);
+ else if (Subtarget->hasV5TOps())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T);
+ else if (Subtarget->hasV4TOps())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
if (Subtarget->hasNEON() && emitFPU) {
/* NEON is not exactly a VFP architecture, but GAS emit one of
@@ -893,7 +914,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
void ARMAsmPrinter::
EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
- int Size = TM.getTargetData()->getTypeAllocSize(MCPV->getType());
+ int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType());
ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
@@ -1091,16 +1112,6 @@ static void populateADROperands(MCInst &Inst, unsigned Dest,
Inst.addOperand(MCOperand::CreateReg(ccreg));
}
-void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI,
- unsigned Opcode) {
- MCInst TmpInst;
-
- // Emit the instruction as usual, just patch the opcode.
- LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
- TmpInst.setOpcode(Opcode);
- OutStreamer.EmitInstruction(TmpInst);
-}
-
void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
assert(MI->getFlag(MachineInstr::FrameSetup) &&
"Only instruction which are involved into frame setup code are allowed");
@@ -1402,31 +1413,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
return;
}
- case ARM::t2BMOVPCB_CALL: {
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::t2B);
- const GlobalValue *GV = MI->getOperand(0).getGlobal();
- MCSymbol *GVSym = Mang->getSymbol(GV);
- const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
- TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- return;
- }
case ARM::MOVi16_ga_pcrel:
case ARM::t2MOVi16_ga_pcrel: {
MCInst TmpInst;
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 3555e8f5..c875b2c 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -53,7 +53,7 @@ public:
Subtarget = &TM.getSubtarget<ARMSubtarget>();
}
- virtual const char *getPassName() const {
+ virtual const char *getPassName() const LLVM_OVERRIDE {
return "ARM Assembly Printer";
}
@@ -62,22 +62,24 @@ public:
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
+ raw_ostream &O) LLVM_OVERRIDE;
virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant,
- const char *ExtraCode, raw_ostream &O);
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) LLVM_OVERRIDE;
void EmitJumpTable(const MachineInstr *MI);
void EmitJump2Table(const MachineInstr *MI);
- virtual void EmitInstruction(const MachineInstr *MI);
- bool runOnMachineFunction(MachineFunction &F);
+ virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE;
+ virtual bool runOnMachineFunction(MachineFunction &F) LLVM_OVERRIDE;
- virtual void EmitConstantPool() {} // we emit constant pools customly!
- virtual void EmitFunctionBodyEnd();
- virtual void EmitFunctionEntryLabel();
- void EmitStartOfAsmFile(Module &M);
- void EmitEndOfAsmFile(Module &M);
- void EmitXXStructor(const Constant *CV);
+ virtual void EmitConstantPool() LLVM_OVERRIDE {
+ // we emit constant pools customly!
+ }
+ virtual void EmitFunctionBodyEnd() LLVM_OVERRIDE;
+ virtual void EmitFunctionEntryLabel() LLVM_OVERRIDE;
+ virtual void EmitStartOfAsmFile(Module &M) LLVM_OVERRIDE;
+ virtual void EmitEndOfAsmFile(Module &M) LLVM_OVERRIDE;
+ virtual void EmitXXStructor(const Constant *CV) LLVM_OVERRIDE;
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
@@ -101,12 +103,13 @@ private:
public:
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
- MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+ virtual MachineLocation
+ getDebugValueLocation(const MachineInstr *MI) const LLVM_OVERRIDE;
/// EmitDwarfRegOp - Emit dwarf register operation.
- virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const;
+ virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const LLVM_OVERRIDE;
- virtual unsigned getISAEncoding() {
+ virtual unsigned getISAEncoding() LLVM_OVERRIDE {
// ARM/Darwin adds ISA to the DWARF info for each function.
if (!Subtarget->isTargetDarwin())
return 0;
@@ -114,18 +117,19 @@ public:
ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
}
+private:
MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
- MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
- const MachineBasicBlock *MBB) const;
MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
MCSymbol *GetARMSJLJEHLabel(void) const;
MCSymbol *GetARMGVSymbol(const GlobalValue *GV);
+public:
/// EmitMachineConstantPoolValue - Print a machine constantpool value to
/// the .s file.
- virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
+ virtual void
+ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) LLVM_OVERRIDE;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 1cc5a17..3c7bb24 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -49,6 +49,11 @@ static cl::opt<bool>
WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
cl::desc("Widen ARM vmovs to vmovd when possible"));
+static cl::opt<unsigned>
+SwiftPartialUpdateClearance("swift-partial-update-clearance",
+ cl::Hidden, cl::init(12),
+ cl::desc("Clearance before partial register updates"));
+
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
struct ARM_MLxEntry {
uint16_t MLxOpc; // MLA / MLS opcode
@@ -683,7 +688,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Handle register classes that require multiple instructions.
unsigned BeginIdx = 0;
unsigned SubRegs = 0;
- unsigned Spacing = 1;
+ int Spacing = 1;
// Use VORRq when possible.
if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
@@ -697,6 +702,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3;
else if (ARM::DQuadRegClass.contains(DestReg, SrcReg))
Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4;
+ else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2;
else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg))
Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2;
@@ -705,27 +712,38 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg))
Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2;
- if (Opc) {
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- MachineInstrBuilder Mov;
- for (unsigned i = 0; i != SubRegs; ++i) {
- unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
- unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing);
- assert(Dst && Src && "Bad sub-register");
- Mov = AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
- .addReg(Src));
- // VORR takes two source operands.
- if (Opc == ARM::VORRq)
- Mov.addReg(Src);
- }
- // Add implicit super-register defs and kills to the last instruction.
- Mov->addRegisterDefined(DestReg, TRI);
- if (KillSrc)
- Mov->addRegisterKilled(SrcReg, TRI);
- return;
- }
+ assert(Opc && "Impossible reg-to-reg copy");
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineInstrBuilder Mov;
- llvm_unreachable("Impossible reg-to-reg copy");
+ // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
+ if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
+ BeginIdx = BeginIdx + ((SubRegs-1)*Spacing);
+ Spacing = -Spacing;
+ }
+#ifndef NDEBUG
+ SmallSet<unsigned, 4> DstRegs;
+#endif
+ for (unsigned i = 0; i != SubRegs; ++i) {
+ unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
+ unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing);
+ assert(Dst && Src && "Bad sub-register");
+#ifndef NDEBUG
+ assert(!DstRegs.count(Src) && "destructive vector copy");
+ DstRegs.insert(Dst);
+#endif
+ Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
+ .addReg(Src);
+ // VORR takes two source operands.
+ if (Opc == ARM::VORRq)
+ Mov.addReg(Src);
+ Mov = AddDefaultPred(Mov);
+ }
+ // Add implicit super-register defs and kills to the last instruction.
+ Mov->addRegisterDefined(DestReg, TRI);
+ if (KillSrc)
+ Mov->addRegisterKilled(SrcReg, TRI);
}
static const
@@ -775,6 +793,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -922,6 +947,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
@@ -947,6 +973,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (ARM::DPRRegClass.hasSubClassEq(RC)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA;
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -1378,7 +1413,6 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
- case ARM::t2LDRDi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
case ARM::t2LDRSHi12:
@@ -1517,6 +1551,14 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
}
+bool
+ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const {
+ // Reduce false anti-dependencies to let Swift's out-of-order execution
+ // engine do its thing.
+ return Subtarget.isSwift();
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
@@ -1569,71 +1611,41 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
}
/// Identify instructions that can be folded into a MOVCC instruction, and
-/// return the corresponding opcode for the predicated pseudo-instruction.
-static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI,
- const MachineRegisterInfo &MRI) {
+/// return the defining instruction.
+static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
return 0;
if (!MRI.hasOneNonDBGUse(Reg))
return 0;
- MI = MRI.getVRegDef(Reg);
+ MachineInstr *MI = MRI.getVRegDef(Reg);
if (!MI)
return 0;
+ // MI is folded into the MOVCC by predicating it.
+ if (!MI->isPredicable())
+ return 0;
// Check if MI has any non-dead defs or physreg uses. This also detects
// predicated instructions which will be reading CPSR.
for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
+ // Reject frame index operands, PEI can't handle the predicated pseudos.
+ if (MO.isFI() || MO.isCPI() || MO.isJTI())
+ return 0;
if (!MO.isReg())
continue;
+ // MI can't have any tied operands, that would conflict with predication.
+ if (MO.isTied())
+ return 0;
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
return 0;
if (MO.isDef() && !MO.isDead())
return 0;
}
- switch (MI->getOpcode()) {
- default: return 0;
- case ARM::ANDri: return ARM::ANDCCri;
- case ARM::ANDrr: return ARM::ANDCCrr;
- case ARM::ANDrsi: return ARM::ANDCCrsi;
- case ARM::ANDrsr: return ARM::ANDCCrsr;
- case ARM::t2ANDri: return ARM::t2ANDCCri;
- case ARM::t2ANDrr: return ARM::t2ANDCCrr;
- case ARM::t2ANDrs: return ARM::t2ANDCCrs;
- case ARM::EORri: return ARM::EORCCri;
- case ARM::EORrr: return ARM::EORCCrr;
- case ARM::EORrsi: return ARM::EORCCrsi;
- case ARM::EORrsr: return ARM::EORCCrsr;
- case ARM::t2EORri: return ARM::t2EORCCri;
- case ARM::t2EORrr: return ARM::t2EORCCrr;
- case ARM::t2EORrs: return ARM::t2EORCCrs;
- case ARM::ORRri: return ARM::ORRCCri;
- case ARM::ORRrr: return ARM::ORRCCrr;
- case ARM::ORRrsi: return ARM::ORRCCrsi;
- case ARM::ORRrsr: return ARM::ORRCCrsr;
- case ARM::t2ORRri: return ARM::t2ORRCCri;
- case ARM::t2ORRrr: return ARM::t2ORRCCrr;
- case ARM::t2ORRrs: return ARM::t2ORRCCrs;
-
- // ARM ADD/SUB
- case ARM::ADDri: return ARM::ADDCCri;
- case ARM::ADDrr: return ARM::ADDCCrr;
- case ARM::ADDrsi: return ARM::ADDCCrsi;
- case ARM::ADDrsr: return ARM::ADDCCrsr;
- case ARM::SUBri: return ARM::SUBCCri;
- case ARM::SUBrr: return ARM::SUBCCrr;
- case ARM::SUBrsi: return ARM::SUBCCrsi;
- case ARM::SUBrsr: return ARM::SUBCCrsr;
-
- // Thumb2 ADD/SUB
- case ARM::t2ADDri: return ARM::t2ADDCCri;
- case ARM::t2ADDri12: return ARM::t2ADDCCri12;
- case ARM::t2ADDrr: return ARM::t2ADDCCrr;
- case ARM::t2ADDrs: return ARM::t2ADDCCrs;
- case ARM::t2SUBri: return ARM::t2SUBCCri;
- case ARM::t2SUBri12: return ARM::t2SUBCCri12;
- case ARM::t2SUBrr: return ARM::t2SUBCCrr;
- case ARM::t2SUBrs: return ARM::t2SUBCCrs;
- }
+ bool DontMoveAcrossStores = true;
+ if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores))
+ return 0;
+ return MI;
}
bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
@@ -1662,19 +1674,18 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
"Unknown select instruction");
const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
- MachineInstr *DefMI = 0;
- unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI);
- bool Invert = !Opc;
- if (!Opc)
- Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI);
- if (!Opc)
+ MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
+ bool Invert = !DefMI;
+ if (!DefMI)
+ DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this);
+ if (!DefMI)
return 0;
// Create a new predicated version of DefMI.
// Rfalse is the first use.
MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- get(Opc), MI->getOperand(0).getReg())
- .addOperand(MI->getOperand(Invert ? 2 : 1));
+ DefMI->getDesc(),
+ MI->getOperand(0).getReg());
// Copy all the DefMI operands, excluding its (null) predicate.
const MCInstrDesc &DefDesc = DefMI->getDesc();
@@ -1693,6 +1704,15 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
if (NewMI->hasOptionalDef())
AddDefaultCC(NewMI);
+ // The output register value when the predicate is false is an implicit
+ // register operand tied to the first def.
+ // The tie makes the register allocator ensure the FalseReg is allocated the
+ // same register as operand 0.
+ MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
+ FalseReg.setImplicit();
+ NewMI->addOperand(FalseReg);
+ NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
+
// The caller will erase MI, but not DefMI.
DefMI->eraseFromParent();
return NewMI;
@@ -2039,13 +2059,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// Masked compares sometimes use the same register as the corresponding 'and'.
if (CmpMask != ~0) {
- if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) {
+ if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
MI = 0;
for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
UE = MRI->use_end(); UI != UE; ++UI) {
if (UI->getParent() != CmpInstr->getParent()) continue;
MachineInstr *PotentialAND = &*UI;
- if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true))
+ if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
+ isPredicated(PotentialAND))
continue;
MI = PotentialAND;
break;
@@ -2111,6 +2132,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// The single candidate is called MI.
if (!MI) MI = Sub;
+ // We can't use a predicated instruction - it doesn't always write the flags.
+ if (isPredicated(MI))
+ return false;
+
switch (MI->getOpcode()) {
default: break;
case ARM::RSBrr:
@@ -2217,6 +2242,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// Toggle the optional operand to CPSR.
MI->getOperand(5).setReg(ARM::CPSR);
MI->getOperand(5).setIsDef(true);
+ assert(!isPredicated(MI) && "Can't use flags from predicated instruction");
CmpInstr->eraseFromParent();
// Modify the condition code of operands in OperandsToUpdate.
@@ -2347,6 +2373,260 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
return true;
}
+static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: {
+ const MCInstrDesc &Desc = MI->getDesc();
+ int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
+ assert(UOps >= 0 && "bad # UOps");
+ return UOps;
+ }
+
+ case ARM::LDRrs:
+ case ARM::LDRBrs:
+ case ARM::STRrs:
+ case ARM::STRBrs: {
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 1;
+ return 2;
+ }
+
+ case ARM::LDRH:
+ case ARM::STRH: {
+ if (!MI->getOperand(2).getReg())
+ return 1;
+
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 1;
+ return 2;
+ }
+
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2;
+
+ case ARM::LDRSB_POST:
+ case ARM::LDRSH_POST: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ return (Rt == Rm) ? 4 : 3;
+ }
+
+ case ARM::LDR_PRE_REG:
+ case ARM::LDRB_PRE_REG: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rt == Rm)
+ return 3;
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 2;
+ return 3;
+ }
+
+ case ARM::STR_PRE_REG:
+ case ARM::STRB_PRE_REG: {
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 2;
+ return 3;
+ }
+
+ case ARM::LDRH_PRE:
+ case ARM::STRH_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (!Rm)
+ return 2;
+ if (Rt == Rm)
+ return 3;
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub)
+ ? 3 : 2;
+ }
+
+ case ARM::LDR_POST_REG:
+ case ARM::LDRB_POST_REG:
+ case ARM::LDRH_POST: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ return (Rt == Rm) ? 3 : 2;
+ }
+
+ case ARM::LDR_PRE_IMM:
+ case ARM::LDRB_PRE_IMM:
+ case ARM::LDR_POST_IMM:
+ case ARM::LDRB_POST_IMM:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
+ case ARM::STRB_PRE_IMM:
+ case ARM::STRH_POST:
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ case ARM::STR_PRE_IMM:
+ return 2;
+
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSH_PRE: {
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm == 0)
+ return 3;
+ unsigned Rt = MI->getOperand(0).getReg();
+ if (Rt == Rm)
+ return 4;
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 3;
+ return 4;
+ }
+
+ case ARM::LDRD: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(2).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
+ return (Rt == Rn) ? 3 : 2;
+ }
+
+ case ARM::STRD: {
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
+ return 2;
+ }
+
+ case ARM::LDRD_POST:
+ case ARM::t2LDRD_POST:
+ return 3;
+
+ case ARM::STRD_POST:
+ case ARM::t2STRD_POST:
+ return 4;
+
+ case ARM::LDRD_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(3).getReg();
+ unsigned Rm = MI->getOperand(4).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
+ return (Rt == Rn) ? 4 : 3;
+ }
+
+ case ARM::t2LDRD_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(3).getReg();
+ return (Rt == Rn) ? 4 : 3;
+ }
+
+ case ARM::STRD_PRE: {
+ unsigned Rm = MI->getOperand(4).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
+ return 3;
+ }
+
+ case ARM::t2STRD_PRE:
+ return 3;
+
+ case ARM::t2LDR_POST:
+ case ARM::t2LDRB_POST:
+ case ARM::t2LDRB_PRE:
+ case ARM::t2LDRSBi12:
+ case ARM::t2LDRSBi8:
+ case ARM::t2LDRSBpci:
+ case ARM::t2LDRSBs:
+ case ARM::t2LDRH_POST:
+ case ARM::t2LDRH_PRE:
+ case ARM::t2LDRSBT:
+ case ARM::t2LDRSB_POST:
+ case ARM::t2LDRSB_PRE:
+ case ARM::t2LDRSH_POST:
+ case ARM::t2LDRSH_PRE:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSHpci:
+ case ARM::t2LDRSHs:
+ return 2;
+
+ case ARM::t2LDRDi8: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(2).getReg();
+ return (Rt == Rn) ? 3 : 2;
+ }
+
+ case ARM::t2STRB_POST:
+ case ARM::t2STRB_PRE:
+ case ARM::t2STRBs:
+ case ARM::t2STRDi8:
+ case ARM::t2STRH_POST:
+ case ARM::t2STRH_PRE:
+ case ARM::t2STRHs:
+ case ARM::t2STR_POST:
+ case ARM::t2STR_PRE:
+ case ARM::t2STRs:
+ return 2;
+ }
+}
+
+// Return the number of 32-bit words loaded by LDM or stored by STM. If this
+// can't be easily determined return 0 (missing MachineMemOperand).
+//
+// FIXME: The current MachineInstr design does not support relying on machine
+// mem operands to determine the width of a memory access. Instead, we expect
+// the target to provide this information based on the instruction opcode and
+// operands. However, using MachineMemOperand is a the best solution now for
+// two reasons:
+//
+// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
+// operands. This is much more dangerous than using the MachineMemOperand
+// sizes because CodeGen passes can insert/remove optional machine operands. In
+// fact, it's totally incorrect for preRA passes and appears to be wrong for
+// postRA passes as well.
+//
+// 2) getNumLDMAddresses is only used by the scheduling machine model and any
+// machine model that calls this should handle the unknown (zero size) case.
+//
+// Long term, we should require a target hook that verifies MachineMemOperand
+// sizes during MC lowering. That target hook should be local to MC lowering
+// because we can't ensure that it is aware of other MI forms. Doing this will
+// ensure that MachineMemOperands are correctly propagated through all passes.
+unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const {
+ unsigned Size = 0;
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end(); I != E; ++I) {
+ Size += (*I)->getSize();
+ }
+ return Size / 4;
+}
+
unsigned
ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
const MachineInstr *MI) const {
@@ -2356,8 +2636,12 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
const MCInstrDesc &Desc = MI->getDesc();
unsigned Class = Desc.getSchedClass();
int ItinUOps = ItinData->getNumMicroOps(Class);
- if (ItinUOps >= 0)
+ if (ItinUOps >= 0) {
+ if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
+ return getNumMicroOpsSwiftLdSt(ItinData, MI);
+
return ItinUOps;
+ }
unsigned Opc = MI->getOpcode();
switch (Opc) {
@@ -2426,7 +2710,43 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
case ARM::t2STMIA_UPD:
case ARM::t2STMDB_UPD: {
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
- if (Subtarget.isCortexA8()) {
+ if (Subtarget.isSwift()) {
+ // rdar://8402126
+ int UOps = 1 + NumRegs; // One for address computation, one for each ld / st.
+ switch (Opc) {
+ default: break;
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ ++UOps; // One for base register writeback.
+ break;
+ case ARM::LDMIA_RET:
+ case ARM::tPOP_RET:
+ case ARM::t2LDMIA_RET:
+ UOps += 2; // One for base reg wb, one for write to pc.
+ break;
+ }
+ return UOps;
+ } else if (Subtarget.isCortexA8()) {
if (NumRegs < 4)
return 2;
// 4 registers would be issued: 2, 2.
@@ -2435,7 +2755,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
if (NumRegs % 2)
++A8UOps;
return A8UOps;
- } else if (Subtarget.isCortexA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
int A9UOps = (NumRegs / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
@@ -2468,7 +2788,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
DefCycle = RegNo / 2 + 1;
if (RegNo % 2)
++DefCycle;
- } else if (Subtarget.isCortexA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
DefCycle = RegNo;
bool isSLoad = false;
@@ -2512,7 +2832,7 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
DefCycle = 1;
// Result latency is issue cycle + 2: E2.
DefCycle += 2;
- } else if (Subtarget.isCortexA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
DefCycle = (RegNo / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
@@ -2543,7 +2863,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
UseCycle = RegNo / 2 + 1;
if (RegNo % 2)
++UseCycle;
- } else if (Subtarget.isCortexA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
UseCycle = RegNo;
bool isSStore = false;
@@ -2584,7 +2904,7 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
UseCycle = 2;
// Read in E3.
UseCycle += 2;
- } else if (Subtarget.isCortexA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
UseCycle = (RegNo / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
@@ -2769,7 +3089,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget,
const MachineInstr *DefMI,
const MCInstrDesc *DefMCID, unsigned DefAlign) {
int Adjust = 0;
- if (Subtarget.isCortexA8() || Subtarget.isCortexA9()) {
+ if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
// variants are one cycle cheaper.
switch (DefMCID->getOpcode()) {
@@ -2794,9 +3114,40 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget,
break;
}
}
+ } else if (Subtarget.isSwift()) {
+ // FIXME: Properly handle all of the latency adjustments for address
+ // writeback.
+ switch (DefMCID->getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal = DefMI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ Adjust -= 2;
+ else if (!isSub &&
+ ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
+ --Adjust;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt = DefMI->getOperand(3).getImm();
+ if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
+ Adjust -= 2;
+ break;
+ }
+ }
}
- if (DefAlign < 8 && Subtarget.isCortexA9()) {
+ if (DefAlign < 8 && Subtarget.isLikeA9()) {
switch (DefMCID->getOpcode()) {
default: break;
case ARM::VLD1q8:
@@ -2954,7 +3305,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
if (Reg == ARM::CPSR) {
if (DefMI->getOpcode() == ARM::FMSTAT) {
// fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
- return Subtarget.isCortexA9() ? 1 : 20;
+ return Subtarget.isLikeA9() ? 1 : 20;
}
// CPSR set and branch can be paired in the same cycle.
@@ -2970,7 +3321,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// instructions).
if (Latency > 0 && Subtarget.isThumb2()) {
const MachineFunction *MF = DefMI->getParent()->getParent();
- if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ if (MF->getFunction()->getFnAttributes().
+ hasAttribute(Attributes::OptimizeForSize))
--Latency;
}
return Latency;
@@ -3020,7 +3372,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
if (!UseNode->isMachineOpcode()) {
int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
- if (Subtarget.isCortexA9())
+ if (Subtarget.isLikeA9() || Subtarget.isSwift())
return Latency <= 2 ? 1 : Latency - 1;
else
return Latency <= 3 ? 1 : Latency - 2;
@@ -3037,7 +3389,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
UseMCID, UseIdx, UseAlign);
if (Latency > 1 &&
- (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+ (Subtarget.isCortexA8() || Subtarget.isLikeA9())) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
// variants are one cycle cheaper.
switch (DefMCID.getOpcode()) {
@@ -3064,9 +3416,36 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
break;
}
}
+ } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
+ // FIXME: Properly handle all of the latency adjustments for address
+ // writeback.
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ Latency -= 2;
+ else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl 0-3 only.
+ Latency -= 2;
+ break;
+ }
+ }
}
- if (DefAlign < 8 && Subtarget.isCortexA9())
+ if (DefAlign < 8 && Subtarget.isLikeA9())
switch (DefMCID.getOpcode()) {
default: break;
case ARM::VLD1q8:
@@ -3190,18 +3569,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
-unsigned
-ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *DepMI) const {
- unsigned Reg = DefMI->getOperand(DefIdx).getReg();
- if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI))
- return 1;
-
- // If the second MI is predicated, then there is an implicit use dependency.
- return getInstrLatency(ItinData, DefMI);
-}
-
unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -3359,11 +3726,12 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
- // Cortex-A9 is particularly picky about mixing the two and wants these
+ // A9-like cores are particularly picky about mixing the two and want these
// converted.
- if (Subtarget.isCortexA9() && !isPredicated(MI) &&
+ if (Subtarget.isLikeA9() && !isPredicated(MI) &&
(MI->getOpcode() == ARM::VMOVRS ||
- MI->getOpcode() == ARM::VMOVSR))
+ MI->getOpcode() == ARM::VMOVSR ||
+ MI->getOpcode() == ARM::VMOVS))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
// No other instructions can be swizzled, so just determine their domain.
@@ -3383,13 +3751,70 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
return std::make_pair(ExeGeneric, 0);
}
+static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
+ unsigned SReg, unsigned &Lane) {
+ unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
+ Lane = 0;
+
+ if (DReg != ARM::NoRegister)
+ return DReg;
+
+ Lane = 1;
+ DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
+
+ assert(DReg && "S-register with no D super-register?");
+ return DReg;
+}
+
+/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
+/// set ImplicitSReg to a register number that must be marked as implicit-use or
+/// zero if no register needs to be defined as implicit-use.
+///
+/// If the function cannot determine if an SPR should be marked implicit use or
+/// not, it returns false.
+///
+/// This function handles cases where an instruction is being modified from taking
+/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
+/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
+/// lane of the DPR).
+///
+/// If the other SPR is defined, an implicit-use of it should be added. Else,
+/// (including the case where the DPR itself is defined), it should not.
+///
+static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
+ MachineInstr *MI,
+ unsigned DReg, unsigned Lane,
+ unsigned &ImplicitSReg) {
+ // If the DPR is defined or used already, the other SPR lane will be chained
+ // correctly, so there is nothing to be done.
+ if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) {
+ ImplicitSReg = 0;
+ return true;
+ }
+
+ // Otherwise we need to go searching to see if the SPR is set explicitly.
+ ImplicitSReg = TRI->getSubReg(DReg,
+ (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
+ MachineBasicBlock::LivenessQueryResult LQR =
+ MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
+
+ if (LQR == MachineBasicBlock::LQR_Live)
+ return true;
+ else if (LQR == MachineBasicBlock::LQR_Unknown)
+ return false;
+
+ // If the register is known not to be live, there is no need to add an
+ // implicit-use.
+ ImplicitSReg = 0;
+ return true;
+}
+
void
ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
unsigned DstReg, SrcReg, DReg;
unsigned Lane;
MachineInstrBuilder MIB(MI);
const TargetRegisterInfo *TRI = &getRegisterInfo();
- bool isKill;
switch (MI->getOpcode()) {
default:
llvm_unreachable("cannot handle opcode!");
@@ -3400,82 +3825,294 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// Zap the predicate operands.
assert(!isPredicated(MI) && "Cannot predicate a VORRd");
- MI->RemoveOperand(3);
- MI->RemoveOperand(2);
- // Change to a VORRd which requires two identical use operands.
- MI->setDesc(get(ARM::VORRd));
+ // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
- // Add the extra source operand and new predicates.
- // This will go before any implicit ops.
- AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
+ MI->setDesc(get(ARM::VORRd));
+ AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
+ .addReg(SrcReg)
+ .addReg(SrcReg));
break;
case ARM::VMOVRS:
if (Domain != ExeNEON)
break;
assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
+ // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
DstReg = MI->getOperand(0).getReg();
SrcReg = MI->getOperand(1).getReg();
- DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass);
- Lane = 0;
- if (DReg == ARM::NoRegister) {
- DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass);
- Lane = 1;
- assert(DReg && "S-register with no D super-register?");
- }
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
- MI->RemoveOperand(3);
- MI->RemoveOperand(2);
- MI->RemoveOperand(1);
+ DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
+ // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
+ // Note that DSrc has been widened and the other lane may be undef, which
+ // contaminates the entire register.
MI->setDesc(get(ARM::VGETLNi32));
- MIB.addReg(DReg);
- MIB.addImm(Lane);
+ AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
+ .addReg(DReg, RegState::Undef)
+ .addImm(Lane));
- MIB->getOperand(1).setIsUndef();
+ // The old source should be an implicit use, otherwise we might think it
+ // was dead before here.
MIB.addReg(SrcReg, RegState::Implicit);
-
- AddDefaultPred(MIB);
break;
- case ARM::VMOVSR:
+ case ARM::VMOVSR: {
if (Domain != ExeNEON)
break;
assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
+ // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
DstReg = MI->getOperand(0).getReg();
SrcReg = MI->getOperand(1).getReg();
- DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass);
- Lane = 0;
- if (DReg == ARM::NoRegister) {
- DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass);
- Lane = 1;
- assert(DReg && "S-register with no D super-register?");
- }
- isKill = MI->getOperand(0).isKill();
- MI->RemoveOperand(3);
- MI->RemoveOperand(2);
- MI->RemoveOperand(1);
- MI->RemoveOperand(0);
+ DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
+
+ unsigned ImplicitSReg;
+ if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
+ break;
+
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+ // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
+ // Again DDst may be undefined at the beginning of this instruction.
MI->setDesc(get(ARM::VSETLNi32));
- MIB.addReg(DReg);
- MIB.addReg(DReg);
- MIB.addReg(SrcReg);
- MIB.addImm(Lane);
+ MIB.addReg(DReg, RegState::Define)
+ .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI)))
+ .addReg(SrcReg)
+ .addImm(Lane);
+ AddDefaultPred(MIB);
+
+ // The narrower destination must be marked as set to keep previous chains
+ // in place.
+ MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
+ if (ImplicitSReg != 0)
+ MIB.addReg(ImplicitSReg, RegState::Implicit);
+ break;
+ }
+ case ARM::VMOVS: {
+ if (Domain != ExeNEON)
+ break;
+
+ // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+
+ unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
+ DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
+ DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
- MIB->getOperand(1).setIsUndef();
+ unsigned ImplicitSReg;
+ if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
+ break;
- if (isKill)
- MIB->addRegisterKilled(DstReg, TRI, true);
- MIB->addRegisterDefined(DstReg, TRI);
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ if (DSrc == DDst) {
+ // Destination can be:
+ // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
+ MI->setDesc(get(ARM::VDUPLN32d));
+ MIB.addReg(DDst, RegState::Define)
+ .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI)))
+ .addImm(SrcLane);
+ AddDefaultPred(MIB);
+
+ // Neither the source or the destination are naturally represented any
+ // more, so add them in manually.
+ MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
+ MIB.addReg(SrcReg, RegState::Implicit);
+ if (ImplicitSReg != 0)
+ MIB.addReg(ImplicitSReg, RegState::Implicit);
+ break;
+ }
+ // In general there's no single instruction that can perform an S <-> S
+ // move in NEON space, but a pair of VEXT instructions *can* do the
+ // job. It turns out that the VEXTs needed will only use DSrc once, with
+ // the position based purely on the combination of lane-0 and lane-1
+ // involved. For example
+ // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
+ // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
+ // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
+ // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
+ //
+ // Pattern of the MachineInstrs is:
+ // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
+ MachineInstrBuilder NewMIB;
+ NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(ARM::VEXTd32), DDst);
+
+ // On the first instruction, both DSrc and DDst may be <undef> if present.
+ // Specifically when the original instruction didn't have them as an
+ // <imp-use>.
+ unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
+ bool CurUndef = !MI->readsRegister(CurReg, TRI);
+ NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
+ CurUndef = !MI->readsRegister(CurReg, TRI);
+ NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ NewMIB.addImm(1);
+ AddDefaultPred(NewMIB);
+
+ if (SrcLane == DstLane)
+ NewMIB.addReg(SrcReg, RegState::Implicit);
+
+ MI->setDesc(get(ARM::VEXTd32));
+ MIB.addReg(DDst, RegState::Define);
+
+ // On the second instruction, DDst has definitely been defined above, so
+ // it is not <undef>. DSrc, if present, can be <undef> as above.
+ CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
+ CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
+ MIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
+ CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
+ MIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ MIB.addImm(1);
AddDefaultPred(MIB);
+
+ if (SrcLane != DstLane)
+ MIB.addReg(SrcReg, RegState::Implicit);
+
+ // As before, the original destination is no longer represented, add it
+ // implicitly.
+ MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
+ if (ImplicitSReg != 0)
+ MIB.addReg(ImplicitSReg, RegState::Implicit);
break;
+ }
+ }
+
+}
+
+//===----------------------------------------------------------------------===//
+// Partial register updates
+//===----------------------------------------------------------------------===//
+//
+// Swift renames NEON registers with 64-bit granularity. That means any
+// instruction writing an S-reg implicitly reads the containing D-reg. The
+// problem is mostly avoided by translating f32 operations to v2f32 operations
+// on D-registers, but f32 loads are still a problem.
+//
+// These instructions can load an f32 into a NEON register:
+//
+// VLDRS - Only writes S, partial D update.
+// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
+// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
+//
+// FCONSTD can be used as a dependency-breaking instruction.
+
+
+unsigned ARMBaseInstrInfo::
+getPartialRegUpdateClearance(const MachineInstr *MI,
+ unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ // Only Swift has partial register update problems.
+ if (!SwiftPartialUpdateClearance || !Subtarget.isSwift())
+ return 0;
+
+ assert(TRI && "Need TRI instance");
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (MO.readsReg())
+ return 0;
+ unsigned Reg = MO.getReg();
+ int UseOp = -1;
+
+ switch(MI->getOpcode()) {
+ // Normal instructions writing only an S-register.
+ case ARM::VLDRS:
+ case ARM::FCONSTS:
+ case ARM::VMOVSR:
+ // rdar://problem/8791586
+ case ARM::VMOVv8i8:
+ case ARM::VMOVv4i16:
+ case ARM::VMOVv2i32:
+ case ARM::VMOVv2f32:
+ case ARM::VMOVv1i64:
+ UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI);
+ break;
+
+ // Explicitly reads the dependency.
+ case ARM::VLD1LNd32:
+ UseOp = 1;
+ break;
+ default:
+ return 0;
+ }
+
+ // If this instruction actually reads a value from Reg, there is no unwanted
+ // dependency.
+ if (UseOp != -1 && MI->getOperand(UseOp).readsReg())
+ return 0;
+
+ // We must be able to clobber the whole D-reg.
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Virtual register must be a foo:ssub_0<def,undef> operand.
+ if (!MO.getSubReg() || MI->readsVirtualRegister(Reg))
+ return 0;
+ } else if (ARM::SPRRegClass.contains(Reg)) {
+ // Physical register: MI must define the full D-reg.
+ unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ if (!DReg || !MI->definesRegister(DReg, TRI))
+ return 0;
}
+ // MI has an unwanted D-register dependency.
+ // Avoid defs in the previous N instructrions.
+ return SwiftPartialUpdateClearance;
+}
+
+// Break a partial register dependency after getPartialRegUpdateClearance
+// returned non-zero.
+void ARMBaseInstrInfo::
+breakPartialRegDependency(MachineBasicBlock::iterator MI,
+ unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def");
+ assert(TRI && "Need TRI instance");
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned Reg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "Can't break virtual register dependencies.");
+ unsigned DReg = Reg;
+
+ // If MI defines an S-reg, find the corresponding D super-register.
+ if (ARM::SPRRegClass.contains(Reg)) {
+ DReg = ARM::D0 + (Reg - ARM::S0) / 2;
+ assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
+ }
+
+ assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
+ assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
+
+ // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
+ // the full D-register by loading the same value to both lanes. The
+ // instruction is micro-coded with 2 uops, so don't do this until we can
+ // properly schedule micro-coded instuctions. The dispatcher stalls cause
+ // too big regressions.
+
+ // Insert the dependency-breaking FCONSTD before MI.
+ // 96 is the encoding of 0.5, but the actual value doesn't matter here.
+ AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(ARM::FCONSTD), DReg).addImm(96));
+ MI->addRegisterKilled(DReg, TRI, true);
}
bool ARMBaseInstrInfo::hasNOP() const {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 92e5ee8..6f38e35 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -182,10 +182,13 @@ public:
virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles,
const BranchProbability
- &Probability) const {
+ &Probability) const {
return NumCycles == 1;
}
+ virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const;
+
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
@@ -226,15 +229,18 @@ public:
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const;
- virtual unsigned getOutputLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *DepMI) const;
-
/// VFP/NEON execution domains.
std::pair<uint16_t, uint16_t>
getExecutionDomain(const MachineInstr *MI) const;
void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
+ unsigned getPartialRegUpdateClearance(const MachineInstr*, unsigned,
+ const TargetRegisterInfo*) const;
+ void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned,
+ const TargetRegisterInfo *TRI) const;
+ /// Get the number of addresses by LDM or VLDM or zero for unknown.
+ unsigned getNumLDMAddresses(const MachineInstr *MI) const;
+
private:
unsigned getInstBundleLength(const MachineInstr *MI) const;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 9deb96e..e5b300f 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -84,6 +84,11 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
}
+const uint32_t*
+ARMBaseRegisterInfo::getNoPreservedMask() const {
+ return CSR_NoRegs_RegMask;
+}
+
BitVector ARMBaseRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -106,148 +111,12 @@ getReservedRegs(const MachineFunction &MF) const {
for (unsigned i = 0; i != 16; ++i)
Reserved.set(ARM::D16 + i);
}
- return Reserved;
-}
-
-bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
- unsigned Reg) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- switch (Reg) {
- default: break;
- case ARM::SP:
- case ARM::PC:
- return true;
- case ARM::R6:
- if (hasBasePointer(MF))
- return true;
- break;
- case ARM::R7:
- case ARM::R11:
- if (FramePtr == Reg && TFI->hasFP(MF))
- return true;
- break;
- case ARM::R9:
- return STI.isR9Reserved();
- }
-
- return false;
-}
+ const TargetRegisterClass *RC = &ARM::GPRPairRegClass;
+ for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I)
+ for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI)
+ if (Reserved.test(*SI)) Reserved.set(*I);
-bool
-ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC,
- SmallVectorImpl<unsigned> &SubIndices,
- unsigned &NewSubIdx) const {
-
- unsigned Size = RC->getSize() * 8;
- if (Size < 6)
- return 0;
-
- NewSubIdx = 0; // Whole register.
- unsigned NumRegs = SubIndices.size();
- if (NumRegs == 8) {
- // 8 D registers -> 1 QQQQ register.
- return (Size == 512 &&
- SubIndices[0] == ARM::dsub_0 &&
- SubIndices[1] == ARM::dsub_1 &&
- SubIndices[2] == ARM::dsub_2 &&
- SubIndices[3] == ARM::dsub_3 &&
- SubIndices[4] == ARM::dsub_4 &&
- SubIndices[5] == ARM::dsub_5 &&
- SubIndices[6] == ARM::dsub_6 &&
- SubIndices[7] == ARM::dsub_7);
- } else if (NumRegs == 4) {
- if (SubIndices[0] == ARM::qsub_0) {
- // 4 Q registers -> 1 QQQQ register.
- return (Size == 512 &&
- SubIndices[1] == ARM::qsub_1 &&
- SubIndices[2] == ARM::qsub_2 &&
- SubIndices[3] == ARM::qsub_3);
- } else if (SubIndices[0] == ARM::dsub_0) {
- // 4 D registers -> 1 QQ register.
- if (Size >= 256 &&
- SubIndices[1] == ARM::dsub_1 &&
- SubIndices[2] == ARM::dsub_2 &&
- SubIndices[3] == ARM::dsub_3) {
- if (Size == 512)
- NewSubIdx = ARM::qqsub_0;
- return true;
- }
- } else if (SubIndices[0] == ARM::dsub_4) {
- // 4 D registers -> 1 QQ register (2nd).
- if (Size == 512 &&
- SubIndices[1] == ARM::dsub_5 &&
- SubIndices[2] == ARM::dsub_6 &&
- SubIndices[3] == ARM::dsub_7) {
- NewSubIdx = ARM::qqsub_1;
- return true;
- }
- } else if (SubIndices[0] == ARM::ssub_0) {
- // 4 S registers -> 1 Q register.
- if (Size >= 128 &&
- SubIndices[1] == ARM::ssub_1 &&
- SubIndices[2] == ARM::ssub_2 &&
- SubIndices[3] == ARM::ssub_3) {
- if (Size >= 256)
- NewSubIdx = ARM::qsub_0;
- return true;
- }
- }
- } else if (NumRegs == 2) {
- if (SubIndices[0] == ARM::qsub_0) {
- // 2 Q registers -> 1 QQ register.
- if (Size >= 256 && SubIndices[1] == ARM::qsub_1) {
- if (Size == 512)
- NewSubIdx = ARM::qqsub_0;
- return true;
- }
- } else if (SubIndices[0] == ARM::qsub_2) {
- // 2 Q registers -> 1 QQ register (2nd).
- if (Size == 512 && SubIndices[1] == ARM::qsub_3) {
- NewSubIdx = ARM::qqsub_1;
- return true;
- }
- } else if (SubIndices[0] == ARM::dsub_0) {
- // 2 D registers -> 1 Q register.
- if (Size >= 128 && SubIndices[1] == ARM::dsub_1) {
- if (Size >= 256)
- NewSubIdx = ARM::qsub_0;
- return true;
- }
- } else if (SubIndices[0] == ARM::dsub_2) {
- // 2 D registers -> 1 Q register (2nd).
- if (Size >= 256 && SubIndices[1] == ARM::dsub_3) {
- NewSubIdx = ARM::qsub_1;
- return true;
- }
- } else if (SubIndices[0] == ARM::dsub_4) {
- // 2 D registers -> 1 Q register (3rd).
- if (Size == 512 && SubIndices[1] == ARM::dsub_5) {
- NewSubIdx = ARM::qsub_2;
- return true;
- }
- } else if (SubIndices[0] == ARM::dsub_6) {
- // 2 D registers -> 1 Q register (3rd).
- if (Size == 512 && SubIndices[1] == ARM::dsub_7) {
- NewSubIdx = ARM::qsub_3;
- return true;
- }
- } else if (SubIndices[0] == ARM::ssub_0) {
- // 2 S registers -> 1 D register.
- if (SubIndices[1] == ARM::ssub_1) {
- if (Size >= 128)
- NewSubIdx = ARM::dsub_0;
- return true;
- }
- } else if (SubIndices[0] == ARM::ssub_2) {
- // 2 S registers -> 1 D register (2nd).
- if (Size >= 128 && SubIndices[1] == ARM::ssub_3) {
- NewSubIdx = ARM::dsub_1;
- return true;
- }
- }
- }
- return false;
+ return Reserved;
}
const TargetRegisterClass*
@@ -263,6 +132,7 @@ ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
case ARM::QPRRegClassID:
case ARM::QQPRRegClassID:
case ARM::QQQQPRRegClassID:
+ case ARM::GPRPairRegClassID:
return Super;
}
Super = *I++;
@@ -476,7 +346,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
bool
ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
// CortexA9 has a Write-after-write hazard for NEON registers.
- if (!STI.isCortexA9())
+ if (!STI.isLikeA9())
return false;
switch (RC->getID()) {
@@ -561,8 +431,9 @@ needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
- bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttr(Attribute::StackAlignment));
+ bool requiresRealignment =
+ ((MFI->getMaxAlignment() > StackAlign) ||
+ F->getFnAttributes().hasAttribute(Attributes::StackAlignment));
return requiresRealignment && canRealignStack(MF);
}
@@ -595,6 +466,7 @@ unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
const MachineFunction &MF) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
switch (Reg) {
default: break;
// Return 0 if either register of the pair is a special register.
@@ -603,10 +475,10 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
case ARM::R3: return ARM::R2;
case ARM::R5: return ARM::R4;
case ARM::R7:
- return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ return (MRI.isReserved(ARM::R7) || MRI.isReserved(ARM::R6))
? 0 : ARM::R6;
- case ARM::R9: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
- case ARM::R11: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
+ case ARM::R9: return MRI.isReserved(ARM::R9) ? 0 :ARM::R8;
+ case ARM::R11: return MRI.isReserved(ARM::R11) ? 0 : ARM::R10;
case ARM::S1: return ARM::S0;
case ARM::S3: return ARM::S2;
@@ -648,6 +520,7 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
const MachineFunction &MF) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
switch (Reg) {
default: break;
// Return 0 if either register of the pair is a special register.
@@ -656,10 +529,10 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
case ARM::R2: return ARM::R3;
case ARM::R4: return ARM::R5;
case ARM::R6:
- return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ return (MRI.isReserved(ARM::R7) || MRI.isReserved(ARM::R6))
? 0 : ARM::R7;
- case ARM::R8: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
- case ARM::R10: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
+ case ARM::R8: return MRI.isReserved(ARM::R9) ? 0 :ARM::R9;
+ case ARM::R10: return MRI.isReserved(ARM::R11) ? 0 : ARM::R11;
case ARM::S0: return ARM::S1;
case ARM::S2: return ARM::S3;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index da29f7e..e2bdd04 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -96,19 +96,10 @@ public:
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+ const uint32_t *getNoPreservedMask() const;
BitVector getReservedRegs(const MachineFunction &MF) const;
- /// canCombineSubRegIndices - Given a register class and a list of
- /// subregister indices, return true if it's possible to combine the
- /// subregister indices into one that corresponds to a larger
- /// subregister. Return the new subregister index by reference. Note the
- /// new index may be zero if the given subregisters can be combined to
- /// form the whole register.
- virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC,
- SmallVectorImpl<unsigned> &SubIndices,
- unsigned &NewSubIdx) const;
-
const TargetRegisterClass*
getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
const TargetRegisterClass*
@@ -170,8 +161,6 @@ public:
unsigned MIFlags = MachineInstr::NoFlags)const;
/// Code Generation virtual methods...
- virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
-
virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index bda1517..b378b96 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -190,6 +190,8 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
// Callee-saved register lists.
//===----------------------------------------------------------------------===//
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
+
def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
(sequence "D%u", 15, 8))>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index e81b4cc..6adbf4f 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -47,7 +47,7 @@ namespace {
class ARMCodeEmitter : public MachineFunctionPass {
ARMJITInfo *JTI;
const ARMBaseInstrInfo *II;
- const TargetData *TD;
+ const DataLayout *TD;
const ARMSubtarget *Subtarget;
TargetMachine &TM;
JITCodeEmitter &MCE;
@@ -67,7 +67,7 @@ namespace {
ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
: MachineFunctionPass(ID), JTI(0),
II((const ARMBaseInstrInfo *)tm.getInstrInfo()),
- TD(tm.getTargetData()), TM(tm),
+ TD(tm.getDataLayout()), TM(tm),
MCE(mce), MCPEs(0), MJTEs(0),
IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
@@ -376,7 +376,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
"JIT relocation model must be set to static or default!");
JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo();
II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo();
- TD = MF.getTarget().getTargetData();
+ TD = MF.getTarget().getDataLayout();
Subtarget = &TM.getSubtarget<ARMSubtarget>();
MCPEs = &MF.getConstantPool()->getConstants();
MJTEs = 0;
@@ -389,7 +389,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
do {
DEBUG(errs() << "JITTing function '"
- << MF.getFunction()->getName() << "'\n");
+ << MF.getName() << "'\n");
MCE.startFunction(MF);
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index a953985..a57368f 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -22,7 +22,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -528,7 +528,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// identity mapping of CPI's to CPE's.
const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const TargetData &TD = *MF->getTarget().getTargetData();
+ const DataLayout &TD = *MF->getTarget().getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
assert(Size >= 4 && "Too small constant pool entry");
@@ -1388,10 +1388,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// If the original WaterList entry was "new water" on this iteration,
// propagate that to the new island. This is just keeping NewWaterList
// updated to match the WaterList, which will be updated below.
- if (NewWaterList.count(WaterBB)) {
- NewWaterList.erase(WaterBB);
+ if (NewWaterList.erase(WaterBB))
NewWaterList.insert(NewIsland);
- }
+
// The new CPE goes before the following block (NewMBB).
NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 6b98d44..ae531c4 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -102,8 +102,6 @@ public:
virtual void print(raw_ostream &O) const;
void print(raw_ostream *O) const { if (O) print(*O); }
void dump() const;
-
- static bool classof(const ARMConstantPoolValue *) { return true; }
};
inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
@@ -158,7 +156,6 @@ public:
static bool classof(const ARMConstantPoolValue *APV) {
return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA();
}
- static bool classof(const ARMConstantPoolConstant *) { return true; }
};
/// ARMConstantPoolSymbol - ARM-specific constantpool values for external
@@ -192,7 +189,6 @@ public:
static bool classof(const ARMConstantPoolValue *ACPV) {
return ACPV->isExtSymbol();
}
- static bool classof(const ARMConstantPoolSymbol *) { return true; }
};
/// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic
@@ -225,7 +221,6 @@ public:
static bool classof(const ARMConstantPoolValue *ACPV) {
return ACPV->isMachineBasicBlock();
}
- static bool classof(const ARMConstantPoolMBB *) { return true; }
};
} // End llvm namespace
diff --git a/lib/Target/ARM/ARMELFWriterInfo.cpp b/lib/Target/ARM/ARMELFWriterInfo.cpp
deleted file mode 100644
index f671317..0000000
--- a/lib/Target/ARM/ARMELFWriterInfo.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- ARMELFWriterInfo.cpp - ELF Writer Info for the ARM backend --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ELF writer information for the ARM backend.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARMELFWriterInfo.h"
-#include "ARMRelocations.h"
-#include "llvm/Function.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/ELF.h"
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Implementation of the ARMELFWriterInfo class
-//===----------------------------------------------------------------------===//
-
-ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM)
- : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64,
- TM.getTargetData()->isLittleEndian()) {
-}
-
-ARMELFWriterInfo::~ARMELFWriterInfo() {}
-
-unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
- switch (MachineRelTy) {
- case ARM::reloc_arm_absolute:
- case ARM::reloc_arm_relative:
- case ARM::reloc_arm_cp_entry:
- case ARM::reloc_arm_vfp_cp_entry:
- case ARM::reloc_arm_machine_cp_entry:
- case ARM::reloc_arm_jt_base:
- case ARM::reloc_arm_pic_jt:
- llvm_unreachable("unsupported ARM relocation type");
-
- case ARM::reloc_arm_branch: return ELF::R_ARM_CALL;
- case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS;
- case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC;
- default:
- llvm_unreachable("unknown ARM relocation type");
- }
-}
-
-long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
- long int Modifier) const {
- llvm_unreachable("ARMELFWriterInfo::getDefaultAddendForRelTy() not "
- "implemented");
-}
-
-unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
- llvm_unreachable("ARMELFWriterInfo::getRelocationTySize() not implemented");
-}
-
-bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
- llvm_unreachable("ARMELFWriterInfo::isPCRelativeRel() not implemented");
-}
-
-unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
- llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not "
- "implemented");
-}
-
-long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset,
- unsigned RelOffset,
- unsigned RelTy) const {
- llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not "
- "implemented");
-}
diff --git a/lib/Target/ARM/ARMELFWriterInfo.h b/lib/Target/ARM/ARMELFWriterInfo.h
deleted file mode 100644
index 6a84f8a..0000000
--- a/lib/Target/ARM/ARMELFWriterInfo.h
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- ARMELFWriterInfo.h - ELF Writer Info for ARM ------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ELF writer information for the ARM backend.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM_ELF_WRITER_INFO_H
-#define ARM_ELF_WRITER_INFO_H
-
-#include "llvm/Target/TargetELFWriterInfo.h"
-
-namespace llvm {
- class TargetMachine;
-
- class ARMELFWriterInfo : public TargetELFWriterInfo {
- public:
- ARMELFWriterInfo(TargetMachine &TM);
- virtual ~ARMELFWriterInfo();
-
- /// getRelocationType - Returns the target specific ELF Relocation type.
- /// 'MachineRelTy' contains the object code independent relocation type
- virtual unsigned getRelocationType(unsigned MachineRelTy) const;
-
- /// hasRelocationAddend - True if the target uses an addend in the
- /// ELF relocation entry.
- virtual bool hasRelocationAddend() const { return false; }
-
- /// getDefaultAddendForRelTy - Gets the default addend value for a
- /// relocation entry based on the target ELF relocation type.
- virtual long int getDefaultAddendForRelTy(unsigned RelTy,
- long int Modifier = 0) const;
-
- /// getRelTySize - Returns the size of relocatable field in bits
- virtual unsigned getRelocationTySize(unsigned RelTy) const;
-
- /// isPCRelativeRel - True if the relocation type is pc relative
- virtual bool isPCRelativeRel(unsigned RelTy) const;
-
- /// getJumpTableRelocationTy - Returns the machine relocation type used
- /// to reference a jumptable.
- virtual unsigned getAbsoluteLabelMachineRelTy() const;
-
- /// computeRelocation - Some relocatable fields could be relocated
- /// directly, avoiding the relocation symbol emission, compute the
- /// final relocation value for this symbol.
- virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
- unsigned RelTy) const;
- };
-
-} // end llvm namespace
-
-#endif // ARM_ELF_WRITER_INFO_H
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 15bb32e..8c45e0b 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -103,9 +103,9 @@ namespace {
bool IsLoad;
bool isUpdating;
bool hasWritebackOperand;
- NEONRegSpacing RegSpacing;
- unsigned char NumRegs; // D registers loaded or stored
- unsigned char RegElts; // elements per D register; used for lane ops
+ uint8_t RegSpacing; // One of type NEONRegSpacing
+ uint8_t NumRegs; // D registers loaded or stored
+ uint8_t RegElts; // elements per D register; used for lane ops
// FIXME: Temporary flag to denote whether the real instruction takes
// a single register (like the encoding) or all of the registers in
// the list (like the asm syntax and the isel DAG). When all definitions
@@ -377,7 +377,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
- NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
unsigned NumRegs = TableEntry->NumRegs;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
@@ -442,7 +442,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
- NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
unsigned NumRegs = TableEntry->NumRegs;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
@@ -493,7 +493,7 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
assert(TableEntry && "NEONLdStTable lookup failed");
- NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
unsigned NumRegs = TableEntry->NumRegs;
unsigned RegElts = TableEntry->RegElts;
@@ -777,9 +777,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
- case ARM::Int_eh_sjlj_dispatchsetup:
- case ARM::Int_eh_sjlj_dispatchsetup_nofp:
- case ARM::tInt_eh_sjlj_dispatchsetup: {
+ case ARM::Int_eh_sjlj_dispatchsetup: {
MachineFunction &MF = *MI.getParent()->getParent();
const ARMBaseInstrInfo *AII =
static_cast<const ARMBaseInstrInfo*>(TII);
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index bf9d16e..6611862 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -40,7 +40,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -100,51 +100,53 @@ class ARMFastISel : public FastISel {
}
// Code from FastISel.cpp.
- virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC);
- virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill);
- virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill);
- virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- unsigned Op2, bool Op2IsKill);
- virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- uint64_t Imm);
- virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- const ConstantFP *FPImm);
- virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- uint64_t Imm);
- virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- uint64_t Imm);
- virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- uint64_t Imm1, uint64_t Imm2);
-
- virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
- unsigned Op0, bool Op0IsKill,
- uint32_t Idx);
+ private:
+ unsigned FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC);
+ unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill);
+ unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm);
+ unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm);
+ unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+ unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2);
+
+ unsigned FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx);
// Backend specific FastISel code.
+ private:
virtual bool TargetSelectInstruction(const Instruction *I);
virtual unsigned TargetMaterializeConstant(const Constant *C);
virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI);
-
+ private:
#include "ARMGenFastISel.inc"
// Instruction selection routines.
@@ -192,6 +194,7 @@ class ARMFastISel : public FastISel {
unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
unsigned ARMSelectCallOp(bool UseReg);
+ unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, EVT VT);
// Call handling routines.
private:
@@ -615,11 +618,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
if (VT != MVT::i32) return 0;
Reloc::Model RelocM = TM.getRelocationModel();
-
- // TODO: Need more magic for ARM PIC.
- if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0;
-
- unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
+ const TargetRegisterClass *RC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
+ unsigned DestReg = createResultReg(RC);
// Use movw+movt when possible, it avoids constant pool entries.
// Darwin targets don't support movt with Reloc::Static, see
@@ -649,6 +652,9 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
Align = TD.getTypeAllocSize(GV->getType());
}
+ if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_)
+ return ARMLowerPICELF(GV, Align, VT);
+
// Grab index.
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
(Subtarget->isThumb() ? 4 : 8);
@@ -666,17 +672,30 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
.addConstantPoolIndex(Idx);
if (RelocM == Reloc::PIC_)
MIB.addImm(Id);
+ AddOptionalDefs(MIB);
} else {
// The extra immediate is for addrmode2.
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
DestReg)
.addConstantPoolIndex(Idx)
.addImm(0);
+ AddOptionalDefs(MIB);
+
+ if (RelocM == Reloc::PIC_) {
+ unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
+ unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc), NewDestReg)
+ .addReg(DestReg)
+ .addImm(Id);
+ AddOptionalDefs(MIB);
+ return NewDestReg;
+ }
}
- AddOptionalDefs(MIB);
}
- if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
+ if (IsIndirect) {
MachineInstrBuilder MIB;
unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
if (isThumb2)
@@ -1009,6 +1028,9 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
RC = &ARM::GPRRegClass;
break;
case MVT::i16:
+ if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
+ return false;
+
if (isThumb2) {
if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
@@ -1021,6 +1043,9 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
RC = &ARM::GPRRegClass;
break;
case MVT::i32:
+ if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
+ return false;
+
if (isThumb2) {
if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
Opc = ARM::t2LDRi8;
@@ -1127,6 +1152,9 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
}
break;
case MVT::i16:
+ if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
+ return false;
+
if (isThumb2) {
if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
StrOpc = ARM::t2STRHi8;
@@ -1138,6 +1166,9 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
}
break;
case MVT::i32:
+ if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
+ return false;
+
if (isThumb2) {
if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
StrOpc = ARM::t2STRi8;
@@ -1360,6 +1391,11 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
.addReg(AddrReg));
+
+ const IndirectBrInst *IB = cast<IndirectBrInst>(I);
+ for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
+
return true;
}
@@ -2210,25 +2246,17 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(CallOpc));
- if (isThumb2) {
- // Explicitly adding the predicate here.
+ // BL / BLX don't take a predicate, but tBL / tBLX do.
+ if (isThumb2)
AddDefaultPred(MIB);
- if (EnableARMLongCalls)
- MIB.addReg(CalleeReg);
- else
- MIB.addExternalSymbol(TLI.getLibcallName(Call));
- } else {
- if (EnableARMLongCalls)
- MIB.addReg(CalleeReg);
- else
- MIB.addExternalSymbol(TLI.getLibcallName(Call));
+ if (EnableARMLongCalls)
+ MIB.addReg(CalleeReg);
+ else
+ MIB.addExternalSymbol(TLI.getLibcallName(Call));
- // Explicitly adding the predicate here.
- AddDefaultPred(MIB);
- }
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
- MIB.addReg(RegArgs[i]);
+ MIB.addReg(RegArgs[i], RegState::Implicit);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
@@ -2300,16 +2328,16 @@ bool ARMFastISel::SelectCall(const Instruction *I,
ISD::ArgFlagsTy Flags;
unsigned AttrInd = i - CS.arg_begin() + 1;
- if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ if (CS.paramHasAttr(AttrInd, Attributes::SExt))
Flags.setSExt();
- if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ if (CS.paramHasAttr(AttrInd, Attributes::ZExt))
Flags.setZExt();
// FIXME: Only handle *easy* calls for now.
- if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
- CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
- CS.paramHasAttr(AttrInd, Attribute::Nest) ||
- CS.paramHasAttr(AttrInd, Attribute::ByVal))
+ if (CS.paramHasAttr(AttrInd, Attributes::InReg) ||
+ CS.paramHasAttr(AttrInd, Attributes::StructRet) ||
+ CS.paramHasAttr(AttrInd, Attributes::Nest) ||
+ CS.paramHasAttr(AttrInd, Attributes::ByVal))
return false;
Type *ArgTy = (*i)->getType();
@@ -2356,30 +2384,20 @@ bool ARMFastISel::SelectCall(const Instruction *I,
unsigned CallOpc = ARMSelectCallOp(UseReg);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(CallOpc));
- if(isThumb2) {
- // Explicitly adding the predicate here.
- AddDefaultPred(MIB);
- if (UseReg)
- MIB.addReg(CalleeReg);
- else if (!IntrMemName)
- MIB.addGlobalAddress(GV, 0, 0);
- else
- MIB.addExternalSymbol(IntrMemName, 0);
- } else {
- if (UseReg)
- MIB.addReg(CalleeReg);
- else if (!IntrMemName)
- MIB.addGlobalAddress(GV, 0, 0);
- else
- MIB.addExternalSymbol(IntrMemName, 0);
- // Explicitly adding the predicate here.
+ // ARM calls don't take a predicate, but tBL / tBLX do.
+ if(isThumb2)
AddDefaultPred(MIB);
- }
+ if (UseReg)
+ MIB.addReg(CalleeReg);
+ else if (!IntrMemName)
+ MIB.addGlobalAddress(GV, 0, 0);
+ else
+ MIB.addExternalSymbol(IntrMemName, 0);
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
- MIB.addReg(RegArgs[i]);
+ MIB.addReg(RegArgs[i], RegState::Implicit);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
@@ -2648,7 +2666,7 @@ bool ARMFastISel::SelectShift(const Instruction *I,
unsigned Reg1 = getRegForValue(Src1Value);
if (Reg1 == 0) return false;
- unsigned Reg2;
+ unsigned Reg2 = 0;
if (Opc == ARM::MOVsr) {
Reg2 = getRegForValue(Src2Value);
if (Reg2 == 0) return false;
@@ -2790,6 +2808,47 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
return true;
}
+unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
+ unsigned Align, EVT VT) {
+ bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
+ ARMConstantPoolConstant *CPV =
+ ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+ unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+ unsigned Opc;
+ unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
+ // Load value.
+ if (isThumb2) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRpci), DestReg1)
+ .addConstantPoolIndex(Idx));
+ Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
+ } else {
+ // The extra immediate is for addrmode2.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(ARM::LDRcp), DestReg1)
+ .addConstantPoolIndex(Idx).addImm(0));
+ Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs;
+ }
+
+ unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ if (GlobalBaseReg == 0) {
+ GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT));
+ AFI->setGlobalBaseReg(GlobalBaseReg);
+ }
+
+ unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc), DestReg2)
+ .addReg(DestReg1)
+ .addReg(GlobalBaseReg);
+ if (!UseGOTOFF)
+ MIB.addImm(0);
+ AddOptionalDefs(MIB);
+
+ return DestReg2;
+}
+
namespace llvm {
FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index aee72d2..9392497 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -153,7 +153,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
int FramePtrSpillFI = 0;
int D8SpillFI = 0;
- // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
return;
@@ -360,7 +361,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
- // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
return;
@@ -1151,7 +1153,7 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
return;
// Naked functions don't spill callee-saved registers.
- if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+ if (MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
return;
// We are planning to use NEON instructions vst1 / vld1.
@@ -1176,7 +1178,7 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned NumSpills = 0;
for (; NumSpills < 8; ++NumSpills)
- if (!MRI.isPhysRegOrOverlapUsed(ARM::D8 + NumSpills))
+ if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills))
break;
// Don't do this for just one d-register. It's not worth it.
@@ -1209,6 +1211,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
*static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// Spill R4 if Thumb2 function requires stack realignment - it will be used as
@@ -1218,12 +1221,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// FIXME: It will be better just to find spare register here.
if (AFI->isThumb2Function() &&
(MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
- MF.getRegInfo().setPhysRegUsed(ARM::R4);
+ MRI.setPhysRegUsed(ARM::R4);
if (AFI->isThumb1OnlyFunction()) {
// Spill LR if Thumb1 function uses variable length argument lists.
if (AFI->getVarArgsRegSaveSize() > 0)
- MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ MRI.setPhysRegUsed(ARM::LR);
// Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
// for sure what the stack size will be, but for this, an estimate is good
@@ -1233,7 +1236,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// FIXME: It will be better just to find spare register here.
unsigned StackSize = estimateStackSize(MF);
if (MFI->hasVarSizedObjects() || StackSize > 508)
- MF.getRegInfo().setPhysRegUsed(ARM::R4);
+ MRI.setPhysRegUsed(ARM::R4);
}
// See if we can spill vector registers to aligned stack.
@@ -1241,7 +1244,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Spill the BasePtr if it's used.
if (RegInfo->hasBasePointer(MF))
- MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
+ MRI.setPhysRegUsed(RegInfo->getBaseRegister());
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
@@ -1249,7 +1252,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
- if (MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
+ if (MRI.isPhysRegUsed(Reg)) {
Spilled = true;
CanEliminateFrame = false;
}
@@ -1338,7 +1341,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
// Spill LR as well so we can fold BX_RET to the registers restore (LDM).
if (!LRSpilled && CS1Spilled) {
- MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ MRI.setPhysRegUsed(ARM::LR);
NumGPRSpills++;
UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
@@ -1347,7 +1350,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
if (hasFP(MF)) {
- MF.getRegInfo().setPhysRegUsed(FramePtr);
+ MRI.setPhysRegUsed(FramePtr);
NumGPRSpills++;
}
@@ -1362,16 +1365,16 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Don't spill high register if the function is thumb1
if (!AFI->isThumb1OnlyFunction() ||
isARMLowRegister(Reg) || Reg == ARM::LR) {
- MF.getRegInfo().setPhysRegUsed(Reg);
- if (!RegInfo->isReservedReg(MF, Reg))
+ MRI.setPhysRegUsed(Reg);
+ if (!MRI.isReserved(Reg))
ExtraCSSpill = true;
break;
}
}
} else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
unsigned Reg = UnspilledCS2GPRs.front();
- MF.getRegInfo().setPhysRegUsed(Reg);
- if (!RegInfo->isReservedReg(MF, Reg))
+ MRI.setPhysRegUsed(Reg);
+ if (!MRI.isReserved(Reg))
ExtraCSSpill = true;
}
}
@@ -1389,7 +1392,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
while (NumExtras && !UnspilledCS1GPRs.empty()) {
unsigned Reg = UnspilledCS1GPRs.back();
UnspilledCS1GPRs.pop_back();
- if (!RegInfo->isReservedReg(MF, Reg) &&
+ if (!MRI.isReserved(Reg) &&
(!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
Reg == ARM::LR)) {
Extras.push_back(Reg);
@@ -1401,7 +1404,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
while (NumExtras && !UnspilledCS2GPRs.empty()) {
unsigned Reg = UnspilledCS2GPRs.back();
UnspilledCS2GPRs.pop_back();
- if (!RegInfo->isReservedReg(MF, Reg)) {
+ if (!MRI.isReserved(Reg)) {
Extras.push_back(Reg);
NumExtras--;
}
@@ -1409,7 +1412,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
if (Extras.size() && NumExtras == 0) {
for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
- MF.getRegInfo().setPhysRegUsed(Extras[i]);
+ MRI.setPhysRegUsed(Extras[i]);
}
} else if (!AFI->isThumb1OnlyFunction()) {
// note: Thumb1 functions spill to R12, not the stack. Reserve a slot
@@ -1423,7 +1426,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
if (ForceLRSpill) {
- MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ MRI.setPhysRegUsed(ARM::LR);
AFI->setLRIsSpilledForFarJump(true);
}
}
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index a5fd15b..1240169 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -47,7 +47,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
// Skip over one non-VFP / NEON instruction.
if (!LastMI->isBarrier() &&
// On A9, AGU and NEON/FPU are muxed.
- !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) &&
+ !(STI.isLikeA9() && (LastMI->mayLoad() || LastMI->mayStore())) &&
(LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
MachineBasicBlock::iterator I = LastMI;
if (I != LastMI->getParent()->begin()) {
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index a3a6c31..efd6d2b 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -239,7 +239,6 @@ private:
/// SelectCMOVOp - Select CMOV instructions for ARM.
SDNode *SelectCMOVOp(SDNode *N);
- SDNode *SelectConditionalOp(SDNode *N);
SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
@@ -306,7 +305,7 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
}
/// \brief Check whether a particular node is a constant value representable as
-/// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax).
+/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
///
/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
static bool isScaledConstantInRange(SDValue Node, int Scale,
@@ -337,7 +336,8 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
if (!CheckVMLxHazard)
return true;
- if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9())
+ if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9() &&
+ !Subtarget->isSwift())
return true;
if (!N->hasOneUse())
@@ -375,12 +375,13 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
ARM_AM::ShiftOpc ShOpcVal,
unsigned ShAmt) {
- if (!Subtarget->isCortexA9())
+ if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
return true;
if (Shift.hasOneUse())
return true;
// R << 2 is free.
- return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
+ return ShOpcVal == ARM_AM::lsl &&
+ (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
}
bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
@@ -487,7 +488,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::MUL &&
- (!Subtarget->isCortexA9() || N.hasOneUse())) {
+ ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
// X * [3,5,9] -> X + X * [2,4,8] etc.
int RHSC = (int)RHS->getZExtValue();
@@ -551,7 +552,8 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
// Try matching (R shl C) + (R).
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
- !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+ !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
+ N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
@@ -585,7 +587,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::MUL &&
- (!Subtarget->isCortexA9() || N.hasOneUse())) {
+ (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
// X * [3,5,9] -> X + X * [2,4,8] etc.
int RHSC = (int)RHS->getZExtValue();
@@ -651,7 +653,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
}
}
- if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+ if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
// Compute R +/- (R << N) and reuse it.
Base = N;
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -689,7 +691,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
// Try matching (R shl C) + (R).
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
- !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+ !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
+ N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
@@ -2363,121 +2366,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
}
-SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
- SDValue FalseVal = N->getOperand(0);
- SDValue TrueVal = N->getOperand(1);
- ARMCC::CondCodes CCVal =
- (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
- SDValue CCR = N->getOperand(3);
- assert(CCR.getOpcode() == ISD::Register);
- SDValue InFlag = N->getOperand(4);
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
-
- if (Subtarget->isThumb()) {
- SDValue CPTmp0;
- SDValue CPTmp1;
- if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::t2ANDCCrs; break;
- case ARMISD::COR: Opc = ARM::t2ORRCCrs; break;
- case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break;
- }
- SDValue Ops[] = {
- FalseVal, FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag
- };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8);
- }
-
- ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
- if (T) {
- unsigned TrueImm = T->getZExtValue();
- if (is_t2_so_imm(TrueImm)) {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::t2ANDCCri; break;
- case ARMISD::COR: Opc = ARM::t2ORRCCri; break;
- case ARMISD::CXOR: Opc = ARM::t2EORCCri; break;
- }
- SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
- SDValue Ops[] = { FalseVal, FalseVal, True, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
- }
- }
-
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::t2ANDCCrr; break;
- case ARMISD::COR: Opc = ARM::t2ORRCCrr; break;
- case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break;
- }
- SDValue Ops[] = { FalseVal, FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
- }
-
- SDValue CPTmp0;
- SDValue CPTmp1;
- SDValue CPTmp2;
- if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::ANDCCrsi; break;
- case ARMISD::COR: Opc = ARM::ORRCCrsi; break;
- case ARMISD::CXOR: Opc = ARM::EORCCrsi; break;
- }
- SDValue Ops[] = {
- FalseVal, FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag
- };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8);
- }
-
- if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::ANDCCrsr; break;
- case ARMISD::COR: Opc = ARM::ORRCCrsr; break;
- case ARMISD::CXOR: Opc = ARM::EORCCrsr; break;
- }
- SDValue Ops[] = {
- FalseVal, FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag
- };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 9);
- }
-
- ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
- if (T) {
- unsigned TrueImm = T->getZExtValue();
- if (is_so_imm(TrueImm)) {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::ANDCCri; break;
- case ARMISD::COR: Opc = ARM::ORRCCri; break;
- case ARMISD::CXOR: Opc = ARM::EORCCri; break;
- }
- SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
- SDValue Ops[] = { FalseVal, FalseVal, True, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
- }
- }
-
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::ANDCCrr; break;
- case ARMISD::COR: Opc = ARM::ORRCCrr; break;
- case ARMISD::CXOR: Opc = ARM::EORCCrr; break;
- }
- SDValue Ops[] = { FalseVal, FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
-}
-
/// Target-specific DAG combining for ISD::XOR.
/// Target-independent combining lowers SELECT_CC nodes of the form
/// select_cc setg[ge] X, 0, X, -X
@@ -2753,6 +2641,38 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
dl, MVT::i32, MVT::i32, Ops, 5);
}
}
+ case ARMISD::UMLAL:{
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32)};
+ return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+ }else{
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::UMLAL : ARM::UMLALv5,
+ dl, MVT::i32, MVT::i32, Ops, 7);
+ }
+ }
+ case ARMISD::SMLAL:{
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32)};
+ return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+ }else{
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::SMLAL : ARM::SMLALv5,
+ dl, MVT::i32, MVT::i32, Ops, 7);
+ }
+ }
case ISD::LOAD: {
SDNode *ResNode = 0;
if (Subtarget->isThumb() && Subtarget->hasThumb2())
@@ -2805,10 +2725,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::CMOV:
return SelectCMOVOp(N);
- case ARMISD::CAND:
- case ARMISD::COR:
- case ARMISD::CXOR:
- return SelectConditionalOp(N);
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 190ca07..ff99b04 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -122,6 +122,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT.isInteger()) {
setOperationAction(ISD::SHL, VT, Custom);
@@ -514,6 +515,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
@@ -566,6 +568,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
}
+ // ARM and Thumb2 support UMLAL/SMLAL.
+ if (!Subtarget->isThumb1Only())
+ setTargetDAGCombine(ISD::ADDC);
+
+
computeRegisterProperties();
// ARM does not have f32 extending load.
@@ -629,9 +636,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- // These are expanded into libcalls.
- if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
- // v7M has a hardware divider
+ if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
+ !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
+ // These are expanded into libcalls if the cpu doesn't have HW divider.
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
}
@@ -791,12 +798,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::MUL);
-
- if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) {
- setTargetDAGCombine(ISD::AND);
- setTargetDAGCombine(ISD::OR);
- setTargetDAGCombine(ISD::XOR);
- }
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::XOR);
if (Subtarget->hasV6Ops())
setTargetDAGCombine(ISD::SRL);
@@ -821,7 +825,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
benefitFromCodePlacementOpt = true;
// Prefer likely predicted branches to selects on out-of-order cores.
- predictableSelectIsExpensive = Subtarget->isCortexA9();
+ predictableSelectIsExpensive = Subtarget->isLikeA9();
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
@@ -898,9 +902,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
case ARMISD::CMOV: return "ARMISD::CMOV";
- case ARMISD::CAND: return "ARMISD::CAND";
- case ARMISD::COR: return "ARMISD::COR";
- case ARMISD::CXOR: return "ARMISD::CXOR";
case ARMISD::RBIT: return "ARMISD::RBIT";
@@ -984,6 +985,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VTBL2: return "ARMISD::VTBL2";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
+ case ARMISD::UMLAL: return "ARMISD::UMLAL";
+ case ARMISD::SMLAL: return "ARMISD::SMLAL";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
@@ -1591,19 +1594,19 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// FIXME: handle tail calls differently.
unsigned CallOpc;
+ bool HasMinSizeAttr = MF.getFunction()->getFnAttributes().
+ hasAttribute(Attributes::MinSize);
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
- else if (doesNotRet && isDirect && !isARMFunc &&
- Subtarget->hasRAS() && !Subtarget->isThumb1Only())
- // "mov lr, pc; b _foo" to avoid confusing the RSP
- CallOpc = ARMISD::CALL_NOLINK;
else
CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
} else {
- if (!isDirect && !Subtarget->hasV5TOps()) {
+ if (!isDirect && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
- } else if (doesNotRet && isDirect && Subtarget->hasRAS())
+ else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
+ // Emit regular call when code size is the priority
+ !HasMinSizeAttr)
// "mov lr, pc; b _foo" to avoid confusing the RSP
CallOpc = ARMISD::CALL_NOLINK;
else
@@ -1653,22 +1656,31 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// and then confiscate the rest of the parameter registers to insure
/// this.
void
-ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
+ARMTargetLowering::HandleByVal(
+ CCState *State, unsigned &size, unsigned Align) const {
unsigned reg = State->AllocateReg(GPRArgRegs, 4);
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
if ((!State->isFirstByValRegValid()) &&
(ARM::R0 <= reg) && (reg <= ARM::R3)) {
- State->setFirstByValReg(reg);
- // At a call site, a byval parameter that is split between
- // registers and memory needs its size truncated here. In a
- // function prologue, such byval parameters are reassembled in
- // memory, and are not truncated.
- if (State->getCallOrPrologue() == Call) {
- unsigned excess = 4 * (ARM::R4 - reg);
- assert(size >= excess && "expected larger existing stack allocation");
- size -= excess;
+ if (Subtarget->isAAPCS_ABI() && Align > 4) {
+ unsigned AlignInRegs = Align / 4;
+ unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
+ for (unsigned i = 0; i < Waste; ++i)
+ reg = State->AllocateReg(GPRArgRegs, 4);
+ }
+ if (reg != 0) {
+ State->setFirstByValReg(reg);
+ // At a call site, a byval parameter that is split between
+ // registers and memory needs its size truncated here. In a
+ // function prologue, such byval parameters are reassembled in
+ // memory, and are not truncated.
+ if (State->getCallOrPrologue() == Call) {
+ unsigned excess = 4 * (ARM::R4 - reg);
+ assert(size >= excess && "expected larger existing stack allocation");
+ size -= excess;
+ }
}
}
// Confiscate any remaining parameter registers to preclude their
@@ -1801,6 +1813,14 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
}
+ // If Caller's vararg or byval argument has been split between registers and
+ // stack, do not perform tail call, since part of the argument is in caller's
+ // local frame.
+ const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
+ getInfo<ARMFunctionInfo>();
+ if (AFI_Caller->getVarArgsRegSaveSize())
+ return false;
+
// If the callee takes no arguments then go on to check the results of the
// call.
if (!Outs.empty()) {
@@ -2532,7 +2552,10 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
void
ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
DebugLoc dl, SDValue &Chain,
- unsigned ArgOffset) const {
+ const Value *OrigArg,
+ unsigned OffsetFromOrigArg,
+ unsigned ArgOffset,
+ bool ForceMutable) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -2559,7 +2582,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
getPointerTy());
SmallVector<SDValue, 4> MemOps;
- for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
+ for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
RC = &ARM::tGPRRegClass;
@@ -2570,7 +2593,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
+ MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
@@ -2581,7 +2604,8 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
&MemOps[0], MemOps.size());
} else
// This will point to the next argument passed via stack.
- AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
+ AFI->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(4, ArgOffset, !ForceMutable));
}
SDValue
@@ -2604,14 +2628,16 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
CCInfo.AnalyzeFormalArguments(Ins,
CCAssignFnForNode(CallConv, /* Return*/ false,
isVarArg));
-
+
SmallVector<SDValue, 16> ArgValues;
int lastInsIndex = -1;
-
SDValue ArgValue;
+ Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
+ unsigned CurArgIdx = 0;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
-
+ std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
// Arguments stored in registers.
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
@@ -2705,14 +2731,20 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
- unsigned VARegSize, VARegSaveSize;
- computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
- VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0);
- unsigned Bytes = Flags.getByValSize() - VARegSize;
- if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
- int FI = MFI->CreateFixedObject(Bytes,
- VA.getLocMemOffset(), false);
- InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (!AFI->getVarArgsFrameIndex()) {
+ VarArgStyleRegisters(CCInfo, DAG,
+ dl, Chain, CurOrigArg,
+ Ins[VA.getValNo()].PartOffset,
+ VA.getLocMemOffset(),
+ true /*force mutable frames*/);
+ int VAFrameIndex = AFI->getVarArgsFrameIndex();
+ InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy()));
+ } else {
+ int FI = MFI->CreateFixedObject(Flags.getByValSize(),
+ VA.getLocMemOffset(), false);
+ InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+ }
} else {
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
VA.getLocMemOffset(), true);
@@ -2730,7 +2762,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// varargs
if (isVarArg)
- VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset());
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0,
+ CCInfo.getNextStackOffset());
return Chain;
}
@@ -3890,6 +3923,36 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
+// check if an VEXT instruction can handle the shuffle mask when the
+// vector sources of the shuffle are the same.
+static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
+ Imm = M[0];
+
+ // If this is a VEXT shuffle, the immediate value is the index of the first
+ // element. The other shuffle indices must be the successive elements after
+ // the first one.
+ unsigned ExpectedElt = Imm;
+ for (unsigned i = 1; i < NumElts; ++i) {
+ // Increment the expected index. If it wraps around, just follow it
+ // back to index zero and keep going.
+ ++ExpectedElt;
+ if (ExpectedElt == NumElts)
+ ExpectedElt = 0;
+
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if (ExpectedElt != static_cast<unsigned>(M[i]))
+ return false;
+ }
+
+ return true;
+}
+
static bool isVEXTMask(ArrayRef<int> M, EVT VT,
bool &ReverseVEXT, unsigned &Imm) {
@@ -4157,10 +4220,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
// Scan through the operands to see if only one value is used.
+ //
+ // As an optimisation, even if more than one value is used it may be more
+ // profitable to splat with one value then change some lanes.
+ //
+ // Heuristically we decide to do this if the vector has a "dominant" value,
+ // defined as splatted to more than half of the lanes.
unsigned NumElts = VT.getVectorNumElements();
bool isOnlyLowElement = true;
bool usesOnlyOneValue = true;
+ bool hasDominantValue = false;
bool isConstant = true;
+
+ // Map of the number of times a particular SDValue appears in the
+ // element list.
+ DenseMap<SDValue, unsigned> ValueCounts;
SDValue Value;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
@@ -4171,13 +4245,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
isConstant = false;
- if (!Value.getNode())
+ ValueCounts.insert(std::make_pair(V, 0));
+ unsigned &Count = ValueCounts[V];
+
+ // Is this value dominant? (takes up more than half of the lanes)
+ if (++Count > (NumElts / 2)) {
+ hasDominantValue = true;
Value = V;
- else if (V != Value)
- usesOnlyOneValue = false;
+ }
}
+ if (ValueCounts.size() != 1)
+ usesOnlyOneValue = false;
+ if (!Value.getNode() && ValueCounts.size() > 0)
+ Value = ValueCounts.begin()->first;
- if (!Value.getNode())
+ if (ValueCounts.size() == 0)
return DAG.getUNDEF(VT);
if (isOnlyLowElement)
@@ -4187,9 +4269,51 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// Use VDUP for non-constant splats. For f32 constant splats, reduce to
// i32 and try again.
- if (usesOnlyOneValue && EltSize <= 32) {
- if (!isConstant)
- return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (hasDominantValue && EltSize <= 32) {
+ if (!isConstant) {
+ SDValue N;
+
+ // If we are VDUPing a value that comes directly from a vector, that will
+ // cause an unnecessary move to and from a GPR, where instead we could
+ // just use VDUPLANE.
+ if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ // We need to create a new undef vector to use for the VDUPLANE if the
+ // size of the vector from which we get the value is different than the
+ // size of the vector that we need to create. We will insert the element
+ // such that the register coalescer will remove unnecessary copies.
+ if (VT != Value->getOperand(0).getValueType()) {
+ ConstantSDNode *constIndex;
+ constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
+ assert(constIndex && "The index is not a constant!");
+ unsigned index = constIndex->getAPIntValue().getLimitedValue() %
+ VT.getVectorNumElements();
+ N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
+ Value, DAG.getConstant(index, MVT::i32)),
+ DAG.getConstant(index, MVT::i32));
+ } else {
+ N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ Value->getOperand(0), Value->getOperand(1));
+ }
+ }
+ else
+ N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+
+ if (!usesOnlyOneValue) {
+ // The dominant value was splatted as 'N', but we now have to insert
+ // all differing elements.
+ for (unsigned I = 0; I < NumElts; ++I) {
+ if (Op.getOperand(I) == Value)
+ continue;
+ SmallVector<SDValue, 3> Ops;
+ Ops.push_back(N);
+ Ops.push_back(Op.getOperand(I));
+ Ops.push_back(DAG.getConstant(I, MVT::i32));
+ N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3);
+ }
+ }
+ return N;
+ }
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i)
@@ -4201,9 +4325,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (Val.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
- SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
- if (Val.getNode())
- return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ if (usesOnlyOneValue) {
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (isConstant && Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ }
}
// If all elements are constants and the case above didn't get hit, fall back
@@ -4586,6 +4712,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (isVREVMask(ShuffleMask, VT, 16))
return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
+ if (V2->getOpcode() == ISD::UNDEF &&
+ isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
+ return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
+ DAG.getConstant(Imm, MVT::i32));
+ }
+
// Check for Neon shuffles that modify both input vectors in place.
// If both results are used, i.e., if there are two shuffles with the same
// source operands and with masks corresponding to both results of one of
@@ -5421,7 +5553,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
(const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
@@ -5532,7 +5664,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
(const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned scratch2 = MRI.createVirtualRegister(TRC);
@@ -5546,7 +5678,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
// ldrex dest, ptr
// (sign extend dest, if required)
// cmp dest, incr
- // cmov.cond scratch2, dest, incr
+ // cmov.cond scratch2, incr, dest
// strex scratch, scratch2, ptr
// cmp scratch, #0
// bne- loopMBB
@@ -5569,7 +5701,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(oldval).addReg(incr));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
- .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
+ .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR);
MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
if (strOpc == ARM::t2STREX)
@@ -5939,12 +6071,15 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MachineMemOperand::MOLoad |
MachineMemOperand::MOVolatile, 4, 4);
- if (AFI->isThumb1OnlyFunction())
- BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup));
- else if (!Subtarget->hasVFP2())
- BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp));
- else
- BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
+
+ const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+
+ // Add a register mask with no preserved registers. This results in all
+ // registers being marked as clobbered.
+ MIB.addRegMask(RI.getNoPreservedMask());
unsigned NumLPads = LPadList.size();
if (Subtarget->isThumb2()) {
@@ -6016,9 +6151,9 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
// MachineConstantPool wants an explicit alignment.
- unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+ unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
if (Align == 0)
- Align = getTargetData()->getTypeAllocSize(C->getType());
+ Align = getDataLayout()->getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
@@ -6105,9 +6240,9 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
// MachineConstantPool wants an explicit alignment.
- unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+ unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
if (Align == 0)
- Align = getTargetData()->getTypeAllocSize(C->getType());
+ Align = getDataLayout()->getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
@@ -6154,18 +6289,15 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
}
// Add the jump table entries as successors to the MBB.
- MachineBasicBlock *PrevMBB = 0;
+ SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
for (std::vector<MachineBasicBlock*>::iterator
I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
MachineBasicBlock *CurMBB = *I;
- if (PrevMBB != CurMBB)
+ if (SeenMBBs.insert(CurMBB))
DispContBB->addSuccessor(CurMBB);
- PrevMBB = CurMBB;
}
// N.B. the order the invoke BBs are processed in doesn't matter here.
- const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
- const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF);
SmallVector<MachineBasicBlock*, 64> MBBLPads;
for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
@@ -6279,7 +6411,8 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
- if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) &&
+ if (!MF->getFunction()->getFnAttributes().
+ hasAttribute(Attributes::NoImplicitFloat) &&
Subtarget->hasNEON()) {
if ((Align % 16 == 0) && SizeVal >= 16) {
ldrOpc = ARM::VLD1q32wb_fixed;
@@ -6364,7 +6497,8 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
} else {
AddDefaultPred(BuildMI(*BB, MI, dl,
TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+ .addReg(srcOut, RegState::Define).addReg(srcIn)
+ .addReg(0).addImm(1));
AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
.addReg(scratch).addReg(destIn)
@@ -6427,9 +6561,9 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
// MachineConstantPool wants an explicit alignment.
- unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+ unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
if (Align == 0)
- Align = getTargetData()->getTypeAllocSize(C->getType());
+ Align = getDataLayout()->getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
@@ -6981,73 +7115,131 @@ static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
return AllOnes ? C->isAllOnesValue() : C->isNullValue();
}
+// Return true if N is conditionally 0 or all ones.
+// Detects these expressions where cc is an i1 value:
+//
+// (select cc 0, y) [AllOnes=0]
+// (select cc y, 0) [AllOnes=0]
+// (zext cc) [AllOnes=0]
+// (sext cc) [AllOnes=0/1]
+// (select cc -1, y) [AllOnes=1]
+// (select cc y, -1) [AllOnes=1]
+//
+// Invert is set when N is the null/all ones constant when CC is false.
+// OtherOp is set to the alternative value of N.
+static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
+ SDValue &CC, bool &Invert,
+ SDValue &OtherOp,
+ SelectionDAG &DAG) {
+ switch (N->getOpcode()) {
+ default: return false;
+ case ISD::SELECT: {
+ CC = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ if (isZeroOrAllOnes(N1, AllOnes)) {
+ Invert = false;
+ OtherOp = N2;
+ return true;
+ }
+ if (isZeroOrAllOnes(N2, AllOnes)) {
+ Invert = true;
+ OtherOp = N1;
+ return true;
+ }
+ return false;
+ }
+ case ISD::ZERO_EXTEND:
+ // (zext cc) can never be the all ones value.
+ if (AllOnes)
+ return false;
+ // Fall through.
+ case ISD::SIGN_EXTEND: {
+ EVT VT = N->getValueType(0);
+ CC = N->getOperand(0);
+ if (CC.getValueType() != MVT::i1)
+ return false;
+ Invert = !AllOnes;
+ if (AllOnes)
+ // When looking for an AllOnes constant, N is an sext, and the 'other'
+ // value is 0.
+ OtherOp = DAG.getConstant(0, VT);
+ else if (N->getOpcode() == ISD::ZERO_EXTEND)
+ // When looking for a 0 constant, N can be zext or sext.
+ OtherOp = DAG.getConstant(1, VT);
+ else
+ OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+ return true;
+ }
+ }
+}
+
// Combine a constant select operand into its use:
//
-// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
-// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
+// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
+// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
//
// The transform is rejected if the select doesn't have a constant operand that
-// is null.
+// is null, or all ones when AllOnes is set.
+//
+// Also recognize sext/zext from i1:
+//
+// (add (zext cc), x) -> (select cc (add x, 1), x)
+// (add (sext cc), x) -> (select cc (add x, -1), x)
+//
+// These transformations eventually create predicated instructions.
//
// @param N The node to transform.
// @param Slct The N operand that is a select.
// @param OtherOp The other N operand (x above).
// @param DCI Context.
+// @param AllOnes Require the select constant to be all ones instead of null.
// @returns The new node, or SDValue() on failure.
static
SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ bool AllOnes = false) {
SelectionDAG &DAG = DCI.DAG;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = N->getValueType(0);
- unsigned Opc = N->getOpcode();
- bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
- SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
- SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
- ISD::CondCode CC = ISD::SETCC_INVALID;
-
- if (isSlctCC) {
- CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
- } else {
- SDValue CCOp = Slct.getOperand(0);
- if (CCOp.getOpcode() == ISD::SETCC)
- CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
- }
-
- bool DoXform = false;
- bool InvCC = false;
- assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
- "Bad input!");
+ SDValue NonConstantVal;
+ SDValue CCOp;
+ bool SwapSelectOps;
+ if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
+ NonConstantVal, DAG))
+ return SDValue();
- if (isZeroOrAllOnes(LHS, false)) {
- DoXform = true;
- } else if (CC != ISD::SETCC_INVALID && isZeroOrAllOnes(RHS, false)) {
- std::swap(LHS, RHS);
- SDValue Op0 = Slct.getOperand(0);
- EVT OpVT = isSlctCC ? Op0.getValueType() : Op0.getOperand(0).getValueType();
- bool isInt = OpVT.isInteger();
- CC = ISD::getSetCCInverse(CC, isInt);
+ // Slct is now know to be the desired identity constant when CC is true.
+ SDValue TrueVal = OtherOp;
+ SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+ OtherOp, NonConstantVal);
+ // Unless SwapSelectOps says CC should be false.
+ if (SwapSelectOps)
+ std::swap(TrueVal, FalseVal);
- if (!TLI.isCondCodeLegal(CC, OpVT))
- return SDValue(); // Inverse operator isn't legal.
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ CCOp, TrueVal, FalseVal);
+}
- DoXform = true;
- InvCC = true;
+// Attempt combineSelectAndUse on each operand of a commutative operator N.
+static
+SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (N0.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes);
+ if (Result.getNode())
+ return Result;
}
-
- if (!DoXform)
- return SDValue();
-
- SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
- if (isSlctCC)
- return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
- Slct.getOperand(0), Slct.getOperand(1), CC);
- SDValue CCOp = Slct.getOperand(0);
- if (InvCC)
- CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
- CCOp.getOperand(0), CCOp.getOperand(1), CC);
- return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
- CCOp, OtherOp, Result);
+ if (N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes);
+ if (Result.getNode())
+ return Result;
+ }
+ return SDValue();
}
// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
@@ -7139,6 +7331,154 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
}
+static SDValue findMUL_LOHI(SDValue V) {
+ if (V->getOpcode() == ISD::UMUL_LOHI ||
+ V->getOpcode() == ISD::SMUL_LOHI)
+ return V;
+ return SDValue();
+}
+
+static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ if (Subtarget->isThumb1Only()) return SDValue();
+
+ // Only perform the checks after legalize when the pattern is available.
+ if (DCI.isBeforeLegalize()) return SDValue();
+
+ // Look for multiply add opportunities.
+ // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
+ // each add nodes consumes a value from ISD::UMUL_LOHI and there is
+ // a glue link from the first add to the second add.
+ // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
+ // a S/UMLAL instruction.
+ // loAdd UMUL_LOHI
+ // \ / :lo \ :hi
+ // \ / \ [no multiline comment]
+ // ADDC | hiAdd
+ // \ :glue / /
+ // \ / /
+ // ADDE
+ //
+ assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC");
+ SDValue AddcOp0 = AddcNode->getOperand(0);
+ SDValue AddcOp1 = AddcNode->getOperand(1);
+
+ // Check if the two operands are from the same mul_lohi node.
+ if (AddcOp0.getNode() == AddcOp1.getNode())
+ return SDValue();
+
+ assert(AddcNode->getNumValues() == 2 &&
+ AddcNode->getValueType(0) == MVT::i32 &&
+ AddcNode->getValueType(1) == MVT::Glue &&
+ "Expect ADDC with two result values: i32, glue");
+
+ // Check that the ADDC adds the low result of the S/UMUL_LOHI.
+ if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
+ AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
+ AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
+ AddcOp1->getOpcode() != ISD::SMUL_LOHI)
+ return SDValue();
+
+ // Look for the glued ADDE.
+ SDNode* AddeNode = AddcNode->getGluedUser();
+ if (AddeNode == NULL)
+ return SDValue();
+
+ // Make sure it is really an ADDE.
+ if (AddeNode->getOpcode() != ISD::ADDE)
+ return SDValue();
+
+ assert(AddeNode->getNumOperands() == 3 &&
+ AddeNode->getOperand(2).getValueType() == MVT::Glue &&
+ "ADDE node has the wrong inputs");
+
+ // Check for the triangle shape.
+ SDValue AddeOp0 = AddeNode->getOperand(0);
+ SDValue AddeOp1 = AddeNode->getOperand(1);
+
+ // Make sure that the ADDE operands are not coming from the same node.
+ if (AddeOp0.getNode() == AddeOp1.getNode())
+ return SDValue();
+
+ // Find the MUL_LOHI node walking up ADDE's operands.
+ bool IsLeftOperandMUL = false;
+ SDValue MULOp = findMUL_LOHI(AddeOp0);
+ if (MULOp == SDValue())
+ MULOp = findMUL_LOHI(AddeOp1);
+ else
+ IsLeftOperandMUL = true;
+ if (MULOp == SDValue())
+ return SDValue();
+
+ // Figure out the right opcode.
+ unsigned Opc = MULOp->getOpcode();
+ unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
+
+ // Figure out the high and low input values to the MLAL node.
+ SDValue* HiMul = &MULOp;
+ SDValue* HiAdd = NULL;
+ SDValue* LoMul = NULL;
+ SDValue* LowAdd = NULL;
+
+ if (IsLeftOperandMUL)
+ HiAdd = &AddeOp1;
+ else
+ HiAdd = &AddeOp0;
+
+
+ if (AddcOp0->getOpcode() == Opc) {
+ LoMul = &AddcOp0;
+ LowAdd = &AddcOp1;
+ }
+ if (AddcOp1->getOpcode() == Opc) {
+ LoMul = &AddcOp1;
+ LowAdd = &AddcOp0;
+ }
+
+ if (LoMul == NULL)
+ return SDValue();
+
+ if (LoMul->getNode() != HiMul->getNode())
+ return SDValue();
+
+ // Create the merged node.
+ SelectionDAG &DAG = DCI.DAG;
+
+ // Build operand list.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(LoMul->getOperand(0));
+ Ops.push_back(LoMul->getOperand(1));
+ Ops.push_back(*LowAdd);
+ Ops.push_back(*HiAdd);
+
+ SDValue MLALNode = DAG.getNode(FinalOpc, AddcNode->getDebugLoc(),
+ DAG.getVTList(MVT::i32, MVT::i32),
+ &Ops[0], Ops.size());
+
+ // Replace the ADDs' nodes uses by the MLA node's values.
+ SDValue HiMLALResult(MLALNode.getNode(), 1);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
+
+ SDValue LoMLALResult(MLALNode.getNode(), 0);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
+
+ // Return original node to notify the driver to stop replacing.
+ SDValue resNode(AddcNode, 0);
+ return resNode;
+}
+
+/// PerformADDCCombine - Target-specific dag combine transform from
+/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.
+static SDValue PerformADDCCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ return AddCombineTo64bitMLAL(N, DCI, Subtarget);
+
+}
+
/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
/// operands N0 and N1. This is a helper for PerformADDCombine that is
/// called with the default operands, and if that fails, with commuted
@@ -7153,7 +7493,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
return Result;
// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
- if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
+ if (N0.getNode()->hasOneUse()) {
SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
if (Result.getNode()) return Result;
}
@@ -7185,7 +7525,7 @@ static SDValue PerformSUBCombine(SDNode *N,
SDValue N1 = N->getOperand(1);
// fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
- if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+ if (N1.getNode()->hasOneUse()) {
SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
if (Result.getNode()) return Result;
}
@@ -7313,41 +7653,6 @@ static SDValue PerformMULCombine(SDNode *N,
return SDValue();
}
-static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) {
- return N.getOpcode() == ARMISD::CMOV && N.getNode()->hasOneUse() &&
- isZeroOrAllOnes(N.getOperand(0), AllOnes);
-}
-
-/// formConditionalOp - Combine an operation with a conditional move operand
-/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y)
-/// (and x, (cmov -1, y, cond)) => (and.cond, x, y)
-static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG,
- bool Commutable) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
- bool isAND = N->getOpcode() == ISD::AND;
- bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND);
- if (!isCand && Commutable) {
- isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND);
- if (isCand)
- std::swap(N0, N1);
- }
- if (!isCand)
- return SDValue();
-
- unsigned Opc = 0;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ISD::AND: Opc = ARMISD::CAND; break;
- case ISD::OR: Opc = ARMISD::COR; break;
- case ISD::XOR: Opc = ARMISD::CXOR; break;
- }
- return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0,
- N1.getOperand(1), N1.getOperand(2), N1.getOperand(3),
- N1.getOperand(4));
-}
-
static SDValue PerformANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
@@ -7382,10 +7687,10 @@ static SDValue PerformANDCombine(SDNode *N,
}
if (!Subtarget->isThumb1Only()) {
- // (and x, (cmov -1, y, cond)) => (and.cond x, y)
- SDValue CAND = formConditionalOp(N, DAG, true);
- if (CAND.getNode())
- return CAND;
+ // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
+ SDValue Result = combineSelectAndUseCommutative(N, true, DCI);
+ if (Result.getNode())
+ return Result;
}
return SDValue();
@@ -7425,13 +7730,12 @@ static SDValue PerformORCombine(SDNode *N,
}
if (!Subtarget->isThumb1Only()) {
- // (or x, (cmov 0, y, cond)) => (or.cond x, y)
- SDValue COR = formConditionalOp(N, DAG, true);
- if (COR.getNode())
- return COR;
+ // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
+ SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
+ if (Result.getNode())
+ return Result;
}
-
// The code below optimizes (or (and X, Y), Z).
// The AND operand needs to have a single user to make these optimizations
// profitable.
@@ -7593,10 +7897,10 @@ static SDValue PerformXORCombine(SDNode *N,
return SDValue();
if (!Subtarget->isThumb1Only()) {
- // (xor x, (cmov 0, y, cond)) => (xor.cond x, y)
- SDValue CXOR = formConditionalOp(N, DAG, true);
- if (CXOR.getNode())
- return CXOR;
+ // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
+ SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
+ if (Result.getNode())
+ return Result;
}
return SDValue();
@@ -8746,6 +9050,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
+ case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget);
case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
@@ -8807,8 +9112,8 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
}
bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
- if (!Subtarget->allowsUnalignedMem())
- return false;
+ // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
+ bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
switch (VT.getSimpleVT().SimpleTy) {
default:
@@ -8816,10 +9121,14 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
case MVT::i8:
case MVT::i16:
case MVT::i32:
- return true;
+ // Unaligned access can use (for example) LRDB, LRDH, LDR
+ return AllowsUnaligned;
case MVT::f64:
- return Subtarget->hasNEON();
- // FIXME: VLD1 etc with standard alignment is legal.
+ case MVT::v2f64:
+ // For any little-endian targets with neon, we can support unaligned ld/st
+ // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
+ // A big-endian target may also explictly support unaligned accesses
+ return Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian());
}
}
@@ -8838,7 +9147,7 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
// See if we can use NEON instructions for this...
if (IsZeroVal &&
- !F->hasFnAttr(Attribute::NoImplicitFloat) &&
+ !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat) &&
Subtarget->hasNEON()) {
if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
return MVT::v4i32;
@@ -9632,7 +9941,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::arm_neon_vld4lane: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
- uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8;
+ uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
@@ -9657,7 +9966,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
- NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
+ NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 51d1205..4eb3b2c 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -63,9 +63,6 @@ namespace llvm {
FMSTAT, // ARM fmstat instruction.
CMOV, // ARM conditional move instructions.
- CAND, // ARM conditional and instructions.
- COR, // ARM conditional or instructions.
- CXOR, // ARM conditional xor instructions.
BCC_i64,
@@ -176,6 +173,9 @@ namespace llvm {
VMULLs, // ...signed
VMULLu, // ...unsigned
+ UMLAL, // 64bit Unsigned Accumulate Multiply
+ SMLAL, // 64bit Signed Accumulate Multiply
+
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
// operations, but for ARM some BUILD_VECTORs are legal as-is and their
@@ -260,6 +260,11 @@ namespace llvm {
virtual const char *getTargetNodeName(unsigned Opcode) const;
+ virtual bool isSelectSupported(SelectSupportKind Kind) const {
+ // ARM does not support scalar condition selects on vectors.
+ return (Kind != ScalarCondVectorVal);
+ }
+
/// getSetCCResultType - Return the value type to use for ISD::SETCC.
virtual EVT getSetCCResultType(EVT VT) const;
@@ -461,7 +466,11 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
- DebugLoc dl, SDValue &Chain, unsigned ArgOffset)
+ DebugLoc dl, SDValue &Chain,
+ const Value *OrigArg,
+ unsigned OffsetFromOrigArg,
+ unsigned ArgOffset,
+ bool ForceMutable = false)
const;
void computeRegArea(CCState &CCInfo, MachineFunction &MF,
@@ -472,7 +481,7 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
/// HandleByVal - Target-specific cleanup for ByVal support.
- virtual void HandleByVal(CCState *, unsigned &) const;
+ virtual void HandleByVal(CCState *, unsigned &, unsigned) const;
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index c8966fb..67a6820 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -846,6 +846,23 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
let Inst{3-0} = Rm;
}
+// Division instructions.
+class ADivA1I<bits<3> opcod, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{27-23} = 0b01110;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = Rm;
+ let Inst{7-4} = 0b0001;
+ let Inst{3-0} = Rn;
+}
+
// PKH instructions
def PKHLSLAsmOperand : ImmAsmOperand {
let Name = "PKHLSLImm";
@@ -893,6 +910,10 @@ class ARMV5TPat<dag pattern, dag result> : Pat<pattern, result> {
class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsARM, HasV5TE];
}
+// ARMV5MOPat - Same as ARMV5TEPat with UseMulOps.
+class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5TE, UseMulOps];
+}
class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsARM, HasV6];
}
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 31b0c41..a0b6f24 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -13,13 +13,17 @@
#include "ARMInstrInfo.h"
#include "ARM.h"
+#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
using namespace llvm;
@@ -84,3 +88,61 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
+
+namespace {
+ /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC
+ /// global base register for ARM ELF.
+ struct ARMCGBR : public MachineFunctionPass {
+ static char ID;
+ ARMCGBR() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->getGlobalBaseReg() == 0)
+ return false;
+
+ const ARMTargetMachine *TM =
+ static_cast<const ARMTargetMachine *>(&MF.getTarget());
+ if (TM->getRelocationModel() != Reloc::PIC_)
+ return false;
+
+ LLVMContext* Context = &MF.getFunction()->getContext();
+ GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
+ GlobalValue::ExternalLinkage, 0,
+ "_GLOBAL_OFFSET_TABLE_");
+ unsigned Id = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id);
+ unsigned Align = TM->getDataLayout()->getPrefTypeAlignment(GV->getType());
+ unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align);
+
+ MachineBasicBlock &FirstMBB = MF.front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+ unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ?
+ ARM::t2LDRpci : ARM::LDRcp;
+ const TargetInstrInfo &TII = *TM->getInstrInfo();
+ MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
+ TII.get(Opc), GlobalBaseReg)
+ .addConstantPoolIndex(Idx);
+ if (Opc == ARM::LDRcp)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+
+ return true;
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM PIC Global Base Reg Initialization";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char ARMCGBR::ID = 0;
+FunctionPass*
+llvm::createARMGlobalBaseRegPass() { return new ARMCGBR(); }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 992aba5..df2e55e 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -83,6 +83,13 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
SDTCisInt<0>,
SDTCisVT<1, i32>,
SDTCisVT<4, i32>]>;
+
+def SDT_ARM64bitmlal : SDTypeProfile<2,4, [ SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>, SDTCisVT<5, i32> ] >;
+def ARMUmlal : SDNode<"ARMISD::UMLAL", SDT_ARM64bitmlal>;
+def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>;
@@ -90,9 +97,10 @@ def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
- [SDNPHasChain, SDNPOutGlue]>;
+ [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+ [SDNPHasChain, SDNPSideEffect,
+ SDNPOptInGlue, SDNPOutGlue]>;
def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" ,
SDT_ARMStructByVal,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
@@ -148,14 +156,16 @@ def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>;
def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
- SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
+ SDT_ARMEH_SJLJ_Setjmp,
+ [SDNPHasChain, SDNPSideEffect]>;
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
- SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
+ SDT_ARMEH_SJLJ_Longjmp,
+ [SDNPHasChain, SDNPSideEffect]>;
def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
- [SDNPHasChain]>;
+ [SDNPHasChain, SDNPSideEffect]>;
def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
- [SDNPHasChain]>;
+ [SDNPHasChain, SDNPSideEffect]>;
def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
@@ -197,6 +207,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
AssemblerPredicate<"FeatureHWDiv", "divide">;
+def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
+ AssemblerPredicate<"FeatureHWDivARM">;
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
AssemblerPredicate<"FeatureT2XtPk",
"pack/extract">;
@@ -232,6 +244,7 @@ def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
def UseMovt : Predicate<"Subtarget->useMovt()">;
def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
+def UseMulOps : Predicate<"Subtarget->useMulOps()">;
// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
// But only select them if more precision in FP computation is allowed.
@@ -242,6 +255,20 @@ def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
"Subtarget->isTargetDarwin()">;
+// VGETLNi32 is microcoded on Swift - prefer VMOV.
+def HasFastVGETLNi32 : Predicate<"!Subtarget->isSwift()">;
+def HasSlowVGETLNi32 : Predicate<"Subtarget->isSwift()">;
+
+// VDUP.32 is microcoded on Swift - prefer VMOV.
+def HasFastVDUP32 : Predicate<"!Subtarget->isSwift()">;
+def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">;
+
+// Cortex-A9 prefers VMOVSR to VMOVDRR even when using NEON for scalar FP, as
+// this allows more effective execution domain optimization. See
+// setExecutionDomain().
+def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">;
+
def IsLE : Predicate<"TLI.isLittleEndian()">;
def IsBE : Predicate<"TLI.isBigEndian()">;
@@ -256,15 +283,13 @@ class RegConstraint<string C> {
// ARM specific transformation functions and pattern fragments.
//
-// imm_neg_XFORM - Return a imm value packed into the format described for
-// imm_neg defs below.
+// imm_neg_XFORM - Return the negation of an i32 immediate value.
def imm_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
}]>;
-// so_imm_not_XFORM - Return a so_imm value packed into the format described for
-// so_imm_not def below.
-def so_imm_not_XFORM : SDNodeXForm<imm, [{
+// imm_not_XFORM - Return the complement of a i32 immediate value.
+def imm_not_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
}]>;
@@ -275,7 +300,7 @@ def imm16_31 : ImmLeaf<i32, [{
def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; }
def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
- int64_t Value = -(int)N->getZExtValue();
+ unsigned Value = -(unsigned)N->getZExtValue();
return Value && ARM_AM::getSOImmVal(Value) != -1;
}], imm_neg_XFORM> {
let ParserMatchClass = so_imm_neg_asmoperand;
@@ -287,7 +312,7 @@ def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; }
def so_imm_not : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
- }], so_imm_not_XFORM> {
+ }], imm_not_XFORM> {
let ParserMatchClass = so_imm_not_asmoperand;
}
@@ -1791,12 +1816,15 @@ def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
let Inst{15-12} = Rd;
let Inst{11-0} = label{11-0};
}
+
+let hasSideEffects = 1 in {
def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
4, IIC_iALUi, []>;
def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
4, IIC_iALUi, []>;
+}
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
@@ -3079,15 +3107,19 @@ def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm),
(SUBSri GPR:$src, so_imm_neg:$imm)>;
def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm),
- (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>;
+ (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>,
+ Requires<[IsARM, HasV6T2]>;
def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm),
- (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>;
+ (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>,
+ Requires<[IsARM, HasV6T2]>;
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR),
(SBCri GPR:$src, so_imm_not:$imm)>;
+def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR),
+ (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>;
// Note: These are implemented in C++ code, because they have to generate
// ADD/SUBrs instructions, which use a complex pattern that a xform function
@@ -3399,6 +3431,18 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
let Inst{11-8} = Rm;
let Inst{3-0} = Rn;
}
+class AsMla1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
// FIXME: The v5 pseudos are only necessary for the additional Constraint
// property. Remove them when it's possible to add those properties
@@ -3419,13 +3463,13 @@ def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm,
4, IIC_iMUL32,
[(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
(MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6, UseMulOps]>;
}
def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6, UseMulOps]> {
bits<4> Ra;
let Inst{15-12} = Ra;
}
@@ -3441,7 +3485,7 @@ def MLAv5: ARMPseudoExpand<(outs GPR:$Rd),
def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
- Requires<[IsARM, HasV6T2]> {
+ Requires<[IsARM, HasV6T2, UseMulOps]> {
bits<4> Rd;
bits<4> Rm;
bits<4> Rn;
@@ -3481,14 +3525,14 @@ def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
}
// Multiply + accumulate
-def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
"smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]>;
-def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
+def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
"umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]>;
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
@@ -3504,17 +3548,22 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
let Inst{3-0} = Rn;
}
-let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+let Constraints = "$RLo = $RdLo,$RHi = $RdHi" in {
def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s),
4, IIC_iMAC64, [],
- (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
+ pred:$p, cc_out:$s)>,
Requires<[IsARM, NoV6]>;
def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s),
4, IIC_iMAC64, [],
- (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
+ pred:$p, cc_out:$s)>,
Requires<[IsARM, NoV6]>;
+}
+
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, pred:$p),
4, IIC_iMAC64, [],
@@ -3542,7 +3591,7 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6, UseMulOps]>;
def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
@@ -3552,7 +3601,7 @@ def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6, UseMulOps]>;
def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
@@ -3606,7 +3655,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd, (add GPR:$Ra,
(opnode (sext_inreg GPRnopc:$Rn, i16),
(sext_inreg GPRnopc:$Rm, i16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3614,7 +3663,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16),
(sra GPRnopc:$Rm, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3622,7 +3671,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
(sext_inreg GPRnopc:$Rm, i16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3630,7 +3679,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
(sra GPRnopc:$Rm, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3638,7 +3687,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
(sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3646,7 +3695,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
(sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
}
}
@@ -3749,6 +3798,19 @@ defm SMUA : AI_sdml<0, "smua">;
defm SMUS : AI_sdml<1, "smus">;
//===----------------------------------------------------------------------===//
+// Division Instructions (ARMv7-A with virtualization extension)
+//
+def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
+ "sdiv", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasDivideInARM]>;
+
+def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
+ "udiv", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasDivideInARM]>;
+
+//===----------------------------------------------------------------------===//
// Misc. Arithmetic Instructions.
//
@@ -3986,48 +4048,6 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
[/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $Rd">;
-// Conditional instructions
-multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi,
- Instruction irsr,
- InstrItinClass iii, InstrItinClass iir,
- InstrItinClass iis> {
- def ri : ARMPseudoExpand<(outs GPR:$Rd),
- (ins GPR:$Rfalse, GPR:$Rn, so_imm:$imm,
- pred:$p, cc_out:$s),
- 4, iii, [],
- (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
- def rr : ARMPseudoExpand<(outs GPR:$Rd),
- (ins GPR:$Rfalse, GPR:$Rn, GPR:$Rm,
- pred:$p, cc_out:$s),
- 4, iir, [],
- (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
- def rsi : ARMPseudoExpand<(outs GPR:$Rd),
- (ins GPR:$Rfalse, GPR:$Rn, so_reg_imm:$shift,
- pred:$p, cc_out:$s),
- 4, iis, [],
- (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
- def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd),
- (ins GPRnopc:$Rfalse, GPRnopc:$Rn, so_reg_reg:$shift,
- pred:$p, cc_out:$s),
- 4, iis, [],
- (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
-}
-
-defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr,
- IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr,
- IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr,
- IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-defm ADDCC : AsI1_bincc_irs<ADDri, ADDrr, ADDrsi, ADDrsr,
- IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-defm SUBCC : AsI1_bincc_irs<SUBri, SUBrr, SUBrsi, SUBrsr,
- IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-
} // neverHasSideEffects
@@ -4723,21 +4743,13 @@ def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
Requires<[IsARM, IsIOS]>;
}
-// eh.sjlj.dispatchsetup pseudo-instructions.
-// These pseudos are used for both ARM and Thumb2. Any differences are
-// handled when the pseudo is expanded (which happens before any passes
-// that need the instruction size).
-let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ],
- isBarrier = 1 in
+// eh.sjlj.dispatchsetup pseudo-instruction.
+// This pseudo is used for both ARM and Thumb. Any differences are handled when
+// the pseudo is expanded (which happens before any passes that need the
+// instruction size).
+let isBarrier = 1 in
def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>;
-let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
- isBarrier = 1 in
-def Int_eh_sjlj_dispatchsetup_nofp : PseudoInst<(outs), (ins), NoItinerary, []>;
-
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -4841,32 +4853,32 @@ def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
(SMULWB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
(sra (shl GPR:$b, (i32 16)), (i32 16)))),
(SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, sext_16_node:$b)),
(SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
(sra GPR:$b, (i32 16)))),
(SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
(SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)),
(sra (shl GPR:$b, (i32 16)), (i32 16)))),
(SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
(SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
(i32 16))),
(SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
(SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 048d340..3cf213c 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -398,6 +398,20 @@ def VecListFourQWordIndexed : Operand<i32> {
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
+def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 8;
+}]>;
+def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 8;
+}]>;
+def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() == 4;
+}]>;
+def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() == 4;
+}]>;
def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() == 2;
}]>;
@@ -1980,7 +1994,7 @@ def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
NEONvgetlaneu, addrmode6> {
let Inst{7-6} = lane{1-0};
- let Inst{4} = Rn{5};
+ let Inst{4} = Rn{4};
}
def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
@@ -2023,7 +2037,7 @@ def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
NEONvgetlaneu, addrmode6> {
let Inst{7-6} = lane{1-0};
- let Inst{4} = Rn{5};
+ let Inst{4} = Rn{4};
}
def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
extractelt, addrmode6oneL32> {
@@ -2273,6 +2287,25 @@ def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
(VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
+// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
+// load / store if it's legal.
+def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
+ (VLD1q64 addrmode6:$addr)>;
+def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q64 addrmode6:$addr, QPR:$value)>;
+def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
+ (VLD1q32 addrmode6:$addr)>;
+def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q32 addrmode6:$addr, QPR:$value)>;
+def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
+ (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
+def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
+ (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
+
//===----------------------------------------------------------------------===//
// NEON pattern fragments
//===----------------------------------------------------------------------===//
@@ -4455,10 +4488,36 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set DPR:$Vd,
(v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
+ (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
+ (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
+ (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
+ (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
+ (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
(and DPR:$Vm, (vnotd DPR:$Vd)))),
- (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+
+def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
+ (and DPR:$Vm, (vnotd DPR:$Vd)))),
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, QPR:$Vm),
@@ -4467,9 +4526,35 @@ def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
[(set QPR:$Vd,
(v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
+ (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
+ (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
+ (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
+ (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
+ (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+
def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
(and QPR:$Vm, (vnotq QPR:$Vd)))),
- (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
+ (and QPR:$Vm, (vnotq QPR:$Vd)))),
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
@@ -4983,7 +5068,8 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
(outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
[(set GPR:$R, (extractelt (v2i32 DPR:$V),
- imm:$lane))]> {
+ imm:$lane))]>,
+ Requires<[HasNEON, HasFastVGETLNi32]> {
let Inst{21} = lane{0};
}
// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
@@ -5006,7 +5092,16 @@ def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
(VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i32_reg imm:$lane))),
- (SubReg_i32_lane imm:$lane))>;
+ (SubReg_i32_lane imm:$lane))>,
+ Requires<[HasNEON, HasFastVGETLNi32]>;
+def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
+ Requires<[HasNEON, HasSlowVGETLNi32]>;
+def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
+ Requires<[HasNEON, HasSlowVGETLNi32]>;
def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
(EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
(SSubReg_f32_reg imm:$src2))>;
@@ -5117,14 +5212,23 @@ class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
-def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>;
+def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>,
+ Requires<[HasNEON, HasFastVDUP32]>;
def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
-def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>;
+// NEONvdup patterns for uarchs with fast VDUP.32.
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
+ Requires<[HasNEON,HasFastVDUP32]>;
def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
+// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
+def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
+ Requires<[HasNEON,HasSlowVDUP32]>;
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
+ Requires<[HasNEON,HasSlowVDUP32]>;
+
// VDUP : Vector Duplicate Lane (from scalar to all elements)
class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
@@ -5561,6 +5665,11 @@ def : N2VSPat<arm_ftoui, VCVTf2ud>;
def : N2VSPat<arm_sitof, VCVTs2fd>;
def : N2VSPat<arm_uitof, VCVTu2fd>;
+// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
+def : Pat<(f32 (bitconvert GPR:$a)),
+ (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
+ Requires<[HasNEON, DontUseVMOVSR]>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 554f6d9..ae7a5c0 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -223,6 +223,7 @@ def t_addrmode_sp : Operand<i32>,
def t_addrmode_pc : Operand<i32> {
let EncoderMethod = "getAddrModePCOpValue";
let DecoderMethod = "DecodeThumbAddrModePC";
+ let PrintMethod = "printThumbLdrLabelOperand";
}
//===----------------------------------------------------------------------===//
@@ -1200,6 +1201,7 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in
def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
2, IIC_iALUi, []>;
+let hasSideEffects = 1 in
def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
2, IIC_iALUi, []>;
@@ -1245,10 +1247,6 @@ def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
[(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
Requires<[IsThumb, IsIOS]>;
-let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ],
- isBarrier = 1 in
-def tInt_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>;
-
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 8ecf009..002d64a 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -159,7 +159,7 @@ def t2addrmode_imm12 : Operand<i32>,
// t2ldrlabel := imm12
def t2ldrlabel : Operand<i32> {
let EncoderMethod = "getAddrModeImm12OpValue";
- let PrintMethod = "printT2LdrLabelOperand";
+ let PrintMethod = "printThumbLdrLabelOperand";
}
def t2ldr_pcrel_imm12_asmoperand : AsmOperandClass {let Name = "MemPCRelImm12";}
@@ -523,6 +523,23 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
let Inst{7-4} = opc7_4;
let Inst{3-0} = Rm;
}
+class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{31-23} = 0b111110111;
+ let Inst{22-20} = opc22_20;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = RdHi;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
@@ -757,33 +774,6 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{24} = 1;
let Inst{23-21} = op23_21;
}
-
- // Predicated versions.
- def CCri : t2PseudoExpand<(outs GPRnopc:$Rd),
- (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_imm:$imm,
- pred:$p, cc_out:$s), 4, IIC_iALUi, [],
- (!cast<Instruction>(NAME#ri) GPRnopc:$Rd,
- GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
- def CCri12 : t2PseudoExpand<(outs GPRnopc:$Rd),
- (ins GPRnopc:$Rfalse, GPR:$Rn, imm0_4095:$imm,
- pred:$p),
- 4, IIC_iALUi, [],
- (!cast<Instruction>(NAME#ri12) GPRnopc:$Rd,
- GPR:$Rn, imm0_4095:$imm, pred:$p)>,
- RegConstraint<"$Rfalse = $Rd">;
- def CCrr : t2PseudoExpand<(outs GPRnopc:$Rd),
- (ins GPRnopc:$Rfalse, GPRnopc:$Rn, rGPR:$Rm,
- pred:$p, cc_out:$s), 4, IIC_iALUr, [],
- (!cast<Instruction>(NAME#rr) GPRnopc:$Rd,
- GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
- def CCrs : t2PseudoExpand<(outs GPRnopc:$Rd),
- (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_reg:$Rm,
- pred:$p, cc_out:$s), 4, IIC_iALUsi, [],
- (!cast<Instruction>(NAME#rs) GPRnopc:$Rd,
- GPRnopc:$Rn, t2_so_reg:$Rm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
}
/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
@@ -1200,6 +1190,7 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
let neverHasSideEffects = 1, isReMaterializable = 1 in
def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
4, IIC_iALUi, []>;
+let hasSideEffects = 1 in
def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
4, IIC_iALUi,
@@ -1962,7 +1953,7 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR),
def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR),
(t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR),
- (t2SBCrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
+ (t2SBCrr rGPR:$src, (t2MOVi16 (imm_not_XFORM imm:$imm)))>;
// Select Bytes -- for disassembly only
@@ -2405,7 +2396,8 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
def t2MLA: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"mla", "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> {
+ [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]>,
+ Requires<[IsThumb2, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -2415,7 +2407,8 @@ def t2MLA: T2FourReg<
def t2MLS: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"mls", "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> {
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]>,
+ Requires<[IsThumb2, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -2437,15 +2430,17 @@ def t2UMULL : T2MulLong<0b010, 0b0000,
} // isCommutable
// Multiply + accumulate
-def t2SMLAL : T2MulLong<0b100, 0b0000,
+def t2SMLAL : T2MlaLong<0b100, 0b0000,
(outs rGPR:$RdLo, rGPR:$RdHi),
- (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
- "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
+ "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">;
-def t2UMLAL : T2MulLong<0b110, 0b0000,
+def t2UMLAL : T2MlaLong<0b110, 0b0000,
(outs rGPR:$RdLo, rGPR:$RdHi),
- (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
- "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
+ "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">;
def t2UMAAL : T2MulLong<0b110, 0b0110,
(outs rGPR:$RdLo, rGPR:$RdHi),
@@ -2482,7 +2477,7 @@ def t2SMMLA : T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmla", "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2503,7 +2498,7 @@ def t2SMMLS: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmls", "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
@@ -2608,7 +2603,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
[(set rGPR:$Rd, (add rGPR:$Ra,
(opnode (sext_inreg rGPR:$Rn, i16),
(sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2621,7 +2616,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
(sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2634,7 +2629,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
(sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2647,7 +2642,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2660,7 +2655,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
(sext_inreg rGPR:$Rm, i16)), (i32 16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2673,7 +2668,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
(sra rGPR:$Rm, (i32 16))), (i32 16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2767,7 +2762,7 @@ def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
// Division Instructions.
// Signed and unsigned division on v7-M
//
-def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"sdiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
Requires<[HasDivide, IsThumb2]> {
@@ -2778,7 +2773,7 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
let Inst{7-4} = 0b1111;
}
-def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"udiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
Requires<[HasDivide, IsThumb2]> {
@@ -3049,37 +3044,6 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
RegConstraint<"$false = $Rd">;
} // isCodeGenOnly = 1
-multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs,
- InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
- // shifted imm
- def ri : t2PseudoExpand<(outs rGPR:$Rd),
- (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_imm:$imm,
- pred:$p, cc_out:$s),
- 4, iii, [],
- (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
- // register
- def rr : t2PseudoExpand<(outs rGPR:$Rd),
- (ins rGPR:$Rfalse, rGPR:$Rn, rGPR:$Rm,
- pred:$p, cc_out:$s),
- 4, iir, [],
- (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
- // shifted register
- def rs : t2PseudoExpand<(outs rGPR:$Rd),
- (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_reg:$ShiftedRm,
- pred:$p, cc_out:$s),
- 4, iis, [],
- (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
-} // T2I_bincc_irs
-
-defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs,
- IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
-defm t2ORRCC : T2I_bincc_irs<t2ORRri, t2ORRrr, t2ORRrs,
- IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
-defm t2EORCC : T2I_bincc_irs<t2EORri, t2EORrr, t2EORrs,
- IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
} // neverHasSideEffects
//===----------------------------------------------------------------------===//
@@ -3281,11 +3245,11 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
let Inst{15-14} = 0b10;
let Inst{12} = 1;
- bits<20> target;
+ bits<24> target;
let Inst{26} = target{19};
let Inst{11} = target{18};
let Inst{13} = target{17};
- let Inst{21-16} = target{16-11};
+ let Inst{25-16} = target{20-11};
let Inst{10-0} = target{10-0};
let DecoderMethod = "DecodeT2BInstruction";
}
@@ -3367,20 +3331,6 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
Requires<[IsThumb2, IsIOS]>;
}
-let isCall = 1, Defs = [LR], Uses = [SP] in {
- // mov lr, pc; b if callee is marked noreturn to avoid confusing the
- // return stack predictor.
- def t2BMOVPCB_CALL : tPseudoInst<(outs),
- (ins t_bltarget:$func),
- 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
- Requires<[IsThumb]>;
-}
-
-// Direct calls
-def : T2Pat<(ARMcall_nolink texternalsym:$func),
- (t2BMOVPCB_CALL texternalsym:$func)>,
- Requires<[IsThumb]>;
-
// IT block
let Defs = [ITSTATE] in
def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index eb7eaa6..b5a896c 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -450,11 +450,11 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
-def : ARMPat<(f32_to_f16 SPR:$a),
- (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+def : Pat<(f32_to_f16 SPR:$a),
+ (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
-def : ARMPat<(f16_to_f32 GPR:$a),
- (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+def : Pat<(f16_to_f32 GPR:$a),
+ (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
@@ -523,10 +523,12 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010,
let D = VFPNeonDomain;
}
+// Bitcast i32 -> f32. NEON prefers to use VMOVDRR.
def VMOVSR : AVConv4I<0b11100000, 0b1010,
(outs SPR:$Sn), (ins GPR:$Rt),
IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
- [(set SPR:$Sn, (bitconvert GPR:$Rt))]> {
+ [(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
+ Requires<[HasVFP2, UseVMOVSR]> {
// Instruction operands.
bits<5> Sn;
bits<4> Rt;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 3f99cce..254d8f6 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -168,7 +168,7 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
intptr_t LazyPtr = getIndirectSymAddr(Fn);
if (!LazyPtr) {
// In PIC mode, the function stub is loading a lazy-ptr.
- LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE);
+ LazyPtr= (intptr_t)emitGlobalValueIndirectSym((const GlobalValue*)F, Fn, JCE);
DEBUG(if (F)
errs() << "JIT: Indirect symbol emitted at [" << LazyPtr
<< "] for GV '" << F->getName() << "'\n";
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 897ceb6..0185289 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -27,7 +27,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -1448,7 +1448,7 @@ namespace {
static char ID;
ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
- const TargetData *TD;
+ const DataLayout *TD;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const ARMSubtarget *STI;
@@ -1478,7 +1478,7 @@ namespace {
}
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- TD = Fn.getTarget().getTargetData();
+ TD = Fn.getTarget().getDataLayout();
TII = Fn.getTarget().getInstrInfo();
TRI = Fn.getTarget().getRegisterInfo();
STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index f1c8fc8..c0ac04b 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -108,6 +108,11 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// pass.
DenseMap<unsigned, unsigned> CPEClones;
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
public:
ARMFunctionInfo() :
isThumb(false),
@@ -119,7 +124,7 @@ public:
GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
NumAlignedDPRCS2Regs(0),
JumpTableUId(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false) {}
+ VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
@@ -130,7 +135,7 @@ public:
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
JumpTableUId(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false) {}
+ VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
bool isThumbFunction() const { return isThumb; }
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -249,6 +254,9 @@ public:
bool hasITBlocks() const { return HasITBlocks; }
void setHasITBlocks(bool h) { HasITBlocks = h; }
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
assert(0 && "Duplicate entries!");
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 6f974fd..b0f576b 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -49,6 +49,9 @@ def ssub_0 : SubRegIndex;
def ssub_1 : SubRegIndex;
def ssub_2 : SubRegIndex<[dsub_1, ssub_0]>;
def ssub_3 : SubRegIndex<[dsub_1, ssub_1]>;
+
+def gsub_0 : SubRegIndex;
+def gsub_1 : SubRegIndex;
// Let TableGen synthesize the remaining 12 ssub_* indices.
// We don't need to name them.
}
@@ -247,11 +250,16 @@ def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
}
// Scalar single precision floating point register class..
-def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>;
+// FIXME: Allocation order changed to s0, s2, s4, ... as a quick hack to
+// avoid partial-write dependencies on D registers (S registers are
+// renamed as portions of D registers).
+def SPR : RegisterClass<"ARM", [f32], 32, (add (decimate
+ (sequence "S%u", 0, 31), 2),
+ (sequence "S%u", 0, 31))>;
// Subset of SPR which can be used as a source of NEON scalars for 16-bit
// operations
-def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>;
+def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>;
// Scalar double precision floating point / generic 64-bit vector register
// class.
@@ -308,6 +316,17 @@ def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
let AltOrderSelect = [{ return 1; }];
}
+// Pseudo-registers representing even-odd pairs of GPRs from R1 to R13/SP.
+// These are needed by instructions (e.g. ldrexd/strexd) requiring even-odd GPRs.
+def Tuples2R : RegisterTuples<[gsub_0, gsub_1],
+ [(add R0, R2, R4, R6, R8, R10, R12),
+ (add R1, R3, R5, R7, R9, R11, SP)]>;
+
+// Register class representing a pair of even-odd GPRs.
+def GPRPair : RegisterClass<"ARM", [untyped], 64, (add Tuples2R)> {
+ let Size = 64; // 2 x 32 bits, we have no predefined type of that size.
+}
+
// Pseudo-registers representing 3 consecutive D registers.
def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2],
[(shl DPR, 0),
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 81d2fa3..02196d0 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -55,6 +55,7 @@ def IIC_iMUL32 : InstrItinClass;
def IIC_iMAC32 : InstrItinClass;
def IIC_iMUL64 : InstrItinClass;
def IIC_iMAC64 : InstrItinClass;
+def IIC_iDIV : InstrItinClass;
def IIC_iLoad_i : InstrItinClass;
def IIC_iLoad_r : InstrItinClass;
def IIC_iLoad_si : InstrItinClass;
@@ -261,3 +262,4 @@ def IIC_VTBX4 : InstrItinClass;
include "ARMScheduleV6.td"
include "ARMScheduleA8.td"
include "ARMScheduleA9.td"
+include "ARMScheduleSwift.td"
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 7bc590f..404634f 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1876,8 +1876,9 @@ def CortexA9Itineraries : ProcessorItineraries<
]>;
// ===---------------------------------------------------------------------===//
-// This following definitions describe the simple machine model which
-// will replace itineraries.
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler and will eventually replace itineraries.
+
// Cortex-A9 machine model for scheduling and other instruction cost heuristics.
def CortexA9Model : SchedMachineModel {
@@ -1891,5 +1892,595 @@ def CortexA9Model : SchedMachineModel {
let Itineraries = CortexA9Itineraries;
}
-// TODO: Add Cortex-A9 processor and scheduler resources.
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+def A9UnitALU : ProcResource<2>;
+def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
+def A9UnitAGU : ProcResource<1>;
+def A9UnitLS : ProcResource<1>;
+def A9UnitFP : ProcResource<1> { let Buffered = 0; }
+def A9UnitB : ProcResource<1>;
+
+//===----------------------------------------------------------------------===//
+// Define scheduler read/write types with their resources and latency on A9.
+
+// Consume an issue slot, but no processor resources. This is useful when all
+// other writes associated with the operand have NumMicroOps = 0.
+def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; }
+
+// Write an integer register.
+def A9WriteI : SchedWriteRes<[A9UnitALU]>;
+// Write an integer shifted-by register
+def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
+
+// Basic ALU.
+def A9WriteA : SchedWriteRes<[A9UnitALU]>;
+// ALU with operand shifted by immediate.
+def A9WriteAsi : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
+// ALU with operand shifted by register.
+def A9WriteAsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
+
+// Multiplication
+def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
+def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5;
+ let NumMicroOps = 0; }
+def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; }
+def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4;
+ let NumMicroOps = 0; }
+
+// Floating-point
+// Only one FP or AGU instruction may issue per cycle. We model this
+// by having FP instructions consume the AGU resource.
+def A9WriteF : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
+def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
+def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
+def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
+def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; }
+def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
+def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; }
+def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; }
+def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; }
+def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; }
+
+// NEON has an odd mix of latencies. Simply name the write types by latency.
+def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
+def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; }
+def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; }
+def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
+def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
+def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
+def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; }
+def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
+def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
+
+// Reserve A9UnitFP for 2 consecutive cycles.
+def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+}
+def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
+ let Latency = 7;
+ let ResourceCycles = [2];
+}
+def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
+ let Latency = 9;
+ let ResourceCycles = [2];
+}
+
+// Branches don't have a def operand but still consume resources.
+def A9WriteB : SchedWriteRes<[A9UnitB]>;
+
+// Address generation.
+def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; }
+
+// Load Integer.
+def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; }
+// Load the upper 32-bits using the same micro-op.
+def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3;
+ let NumMicroOps = 0; }
+// Offset shifted by register.
+def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
+// Load (and zero extend) a byte.
+def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
+def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; }
+
+// Load or Store Float, aligned.
+def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; }
+
+// Store Integer.
+def A9WriteS : SchedWriteRes<[A9UnitLS]>;
+
+//===----------------------------------------------------------------------===//
+// Define resources dynamically for load multiple variants.
+
+// Define helpers for extra latency without consuming resources.
+def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; }
+foreach NumCycles = 2-8 in {
+def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
+} // foreach NumCycles
+
+// Define TII for use in SchedVariant Predicates.
+def : PredicateProlog<[{
+ const ARMBaseInstrInfo *TII =
+ static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
+ (void)TII;
+}]>;
+
+// Define address generation sequences and predicates for 8 flavors of LDMs.
+foreach NumAddr = 1-8 in {
+
+// Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive
+// latency for instructions that generate multiple loads or stores.
+def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>;
+
+// Define a predicate to select the LDM based on number of memory addresses.
+def A9LMAdr#NumAddr#Pred :
+ SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
+
+} // foreach NumAddr
+
+// Fall-back for unknown LDMs.
+def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(MI) == 0">;
+
+// LDM/VLDM/VLDn address generation latency & resources.
+// Dynamically select the A9WriteAdrN sequence using a predicate.
+def A9WriteLMAdr : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>,
+ SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>,
+ SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>,
+ SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>,
+ SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>,
+ SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>,
+ SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>,
+ SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>,
+ // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers.
+ SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>;
+
+// Define LDM Resources.
+// These take no issue resource, so they can be combined with other
+// writes like WriteB.
+// A9WriteLMLo takes a single LS resource and 2 cycles.
+def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2;
+ let NumMicroOps = 0; }
+// Assuming aligned access, the upper half of each pair is free with
+// the same latency.
+def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2;
+ let NumMicroOps = 0; }
+// Each A9WriteL#N variant adds N cycles of latency without consuming
+// additional resources.
+foreach NumAddr = 1-8 in {
+def A9WriteL#NumAddr : WriteSequence<
+ [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+def A9WriteL#NumAddr#Hi : WriteSequence<
+ [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// LDM: Load multiple into 32-bit integer registers.
+
+// A9WriteLM variants expand into a pair of writes for each 64-bit
+// value loaded. When the number of registers is odd, the last
+// A9WriteLnHi is naturally ignored because the instruction has no
+// following def operands. These variants take no issue resource, so
+// they may need to be part of a WriteSequence that includes A9WriteIssue.
+def A9WriteLM : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, [A9WriteL1, A9WriteL1Hi]>,
+ SchedVar<A9LMAdr2Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi]>,
+ SchedVar<A9LMAdr3Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi]>,
+ SchedVar<A9LMAdr4Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi]>,
+ SchedVar<A9LMAdr5Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi]>,
+ SchedVar<A9LMAdr6Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi,
+ A9WriteL6, A9WriteL6Hi]>,
+ SchedVar<A9LMAdr7Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi,
+ A9WriteL6, A9WriteL6Hi,
+ A9WriteL7, A9WriteL7Hi]>,
+ SchedVar<A9LMAdr8Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi,
+ A9WriteL6, A9WriteL6Hi,
+ A9WriteL7, A9WriteL7Hi,
+ A9WriteL8, A9WriteL8Hi]>,
+ // For unknown LDMs, define the maximum number of writes, but only
+ // make the first two consume resources.
+ SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3Hi, A9WriteL3Hi,
+ A9WriteL4Hi, A9WriteL4Hi,
+ A9WriteL5Hi, A9WriteL5Hi,
+ A9WriteL6Hi, A9WriteL6Hi,
+ A9WriteL7Hi, A9WriteL7Hi,
+ A9WriteL8Hi, A9WriteL8Hi]>]> {
+ let Variadic = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support.
+
+// A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources
+// so can be used in WriteSequences for in single-issue instructions that
+// encapsulate multiple loads.
+def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+}
+
+foreach NumAddr = 1-8 in {
+
+// Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops.
+def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>;
+
+// A9WriteLfp1-8 definitions are statically expanded into a sequence of
+// A9WriteLfpOps with additive latency that takes a single issue slot.
+// Used directly to describe NEON VLDn.
+def A9WriteLfp#NumAddr : WriteSequence<
+ [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
+
+// A9WriteLfp1-8Mov adds a cycle of latency and FP resource for
+// permuting loaded values.
+def A9WriteLfp#NumAddr#Mov : WriteSequence<
+ [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
+
+} // foreach NumAddr
+
+// Define VLDM/VSTM PreRA resources.
+// A9WriteLMfpPreRA are dynamically expanded into the correct
+// A9WriteLfp1-8 sequence based on a predicate. This supports the
+// preRA VLDM variants in which all 64-bit loads are written to the
+// same tuple of either single or double precision registers.
+def A9WriteLMfpPreRA : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>,
+ SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>,
+ SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>,
+ SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>,
+ SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>,
+ SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>,
+ SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>,
+ SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>,
+ // For unknown VLDM/VSTM PreRA, assume 2xS registers.
+ SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>;
+
+// Define VLDM/VSTM PostRA Resources.
+// A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency.
+def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; }
+
+foreach NumAddr = 1-8 in {
+
+// Each A9WriteL#N variant adds N cycles of latency without consuming
+// additional resources.
+def A9WriteLMfp#NumAddr : WriteSequence<
+ [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+
+// Assuming aligned access, the upper half of each pair is free with
+// the same latency.
+def A9WriteLMfp#NumAddr#Hi : WriteSequence<
+ [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+
+} // foreach NumAddr
+
+// VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a
+// pair of writes for each 64-bit data loaded. When the number of
+// registers is odd, the last WriteLMfpnHi is naturally ignored because
+// the instruction has no following def operands.
+def A9WriteLMfpPostRA : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, [A9WriteLMfp1, A9WriteLMfp1Hi]>,
+ SchedVar<A9LMAdr2Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi]>,
+ SchedVar<A9LMAdr3Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi]>,
+ SchedVar<A9LMAdr4Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi,
+ A9WriteLMfp4, A9WriteLMfp4Hi]>,
+ SchedVar<A9LMAdr5Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi,
+ A9WriteLMfp4, A9WriteLMfp4Hi,
+ A9WriteLMfp5, A9WriteLMfp5Hi]>,
+ SchedVar<A9LMAdr6Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi,
+ A9WriteLMfp4, A9WriteLMfp4Hi,
+ A9WriteLMfp5, A9WriteLMfp5Hi,
+ A9WriteLMfp6, A9WriteLMfp6Hi]>,
+ SchedVar<A9LMAdr7Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi,
+ A9WriteLMfp4, A9WriteLMfp4Hi,
+ A9WriteLMfp5, A9WriteLMfp5Hi,
+ A9WriteLMfp6, A9WriteLMfp6Hi,
+ A9WriteLMfp7, A9WriteLMfp7Hi]>,
+ SchedVar<A9LMAdr8Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi,
+ A9WriteLMfp4, A9WriteLMfp4Hi,
+ A9WriteLMfp5, A9WriteLMfp5Hi,
+ A9WriteLMfp6, A9WriteLMfp6Hi,
+ A9WriteLMfp7, A9WriteLMfp7Hi,
+ A9WriteLMfp8, A9WriteLMfp8Hi]>,
+ // For unknown LDMs, define the maximum number of writes, but only
+ // make the first two consume resources.
+ SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3Hi, A9WriteLMfp3Hi,
+ A9WriteLMfp4Hi, A9WriteLMfp4Hi,
+ A9WriteLMfp5Hi, A9WriteLMfp5Hi,
+ A9WriteLMfp6Hi, A9WriteLMfp6Hi,
+ A9WriteLMfp7Hi, A9WriteLMfp7Hi,
+ A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> {
+ let Variadic = 1;
+}
+
+// Distinguish between our multiple MI-level forms of the same
+// VLDM/VSTM instructions.
+def A9PreRA : SchedPredicate<
+ "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">;
+def A9PostRA : SchedPredicate<
+ "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">;
+
+// VLDM represents all destination registers as a single register
+// tuple, unlike LDM. So the number of write operands is not variadic.
+def A9WriteLMfp : SchedWriteVariant<[
+ SchedVar<A9PreRA, [A9WriteLMfpPreRA]>,
+ SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>;
+
+//===----------------------------------------------------------------------===//
+// Resources for other (non LDM/VLDM) Variants.
+
+// These mov immediate writers are unconditionally expanded with
+// additive latency.
+def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
+def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, A9WriteA]>;
+def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
+
+// Some ALU operations can read loaded integer values one cycle early.
+def A9ReadA : SchedReadAdvance<1,
+ [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
+ A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
+ A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
+ A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi,
+ A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>;
+
+// Read types for operands that are unconditionally read in cycle N
+// after the instruction issues, decreases producer latency by N-1.
+def A9Read2 : SchedReadAdvance<1>;
+def A9Read3 : SchedReadAdvance<2>;
+def A9Read4 : SchedReadAdvance<3>;
+
+//===----------------------------------------------------------------------===//
+// Map itinerary classes to scheduler read/write resources per operand.
+//
+// For ARM, we piggyback scheduler resources on the Itinerary classes
+// to avoid perturbing the existing instruction definitions.
+
+// This table follows the ARM Cortex-A9 Technical Reference Manuals,
+// mostly in order.
+let SchedModel = CortexA9Model in {
+
+def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
+ IIC_iMVNi,IIC_iMVNsi,
+ IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
+def :ItinRW<[A9WriteI,A9ReadA],[IIC_iMVNr]>;
+def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
+
+def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>;
+def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
+def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
+
+def :ItinRW<[A9WriteA], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
+def :ItinRW<[A9WriteA, A9ReadA], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
+def :ItinRW<[A9WriteA, A9ReadA, A9ReadA],[IIC_iALUr,IIC_iCMPr]>;
+def :ItinRW<[A9WriteAsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
+def :ItinRW<[A9WriteAsi, A9ReadA], [IIC_iALUsi]>;
+def :ItinRW<[A9WriteAsi, ReadDefault, A9ReadA], [IIC_iALUsir]>; // RSB
+def :ItinRW<[A9WriteAsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
+def :ItinRW<[A9WriteAsr, A9ReadA], [IIC_iALUsr,IIC_iCMPsr]>;
+
+// A9WriteHi ignored for MUL32.
+def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
+ IIC_iMUL64,IIC_iMAC64]>;
+// FIXME: SMLALxx needs itin classes
+def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>;
+
+// TODO: For floating-point ops, we model the pipeline forwarding
+// latencies here. WAW latencies are sometimes longer.
+
+def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI,
+ IIC_fpUNA32, IIC_fpUNA64,
+ IIC_fpCMP32, IIC_fpCMP64]>;
+def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>;
+def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS,
+ IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI,
+ IIC_fpALU32, IIC_fpALU64]>;
+def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>;
+def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>;
+def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>;
+def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>;
+def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>;
+def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>;
+def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>;
+def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>;
+
+def :ItinRW<[A9WriteB], [IIC_Br]>;
+
+// A9 PLD is processed in a dedicated unit.
+def :ItinRW<[], [IIC_Preload]>;
+
+// Note: We must assume that loads are aligned, since the machine
+// model cannot know this statically and A9 ignores alignment hints.
+
+// A9WriteAdr consumes AGU regardless address writeback. But it's
+// latency is only relevant for users of an updated address.
+def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r,
+ IIC_iLoad_iu,IIC_iLoad_ru]>;
+def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>;
+def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r,
+ IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>;
+def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>;
+def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r,
+ IIC_iLoad_d_ru]>;
+// Store either has no def operands, or the one def for address writeback.
+def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r,
+ IIC_iStore_iu, IIC_iStore_ru,
+ IIC_iStore_d_i, IIC_iStore_d_r,
+ IIC_iStore_d_ru]>;
+def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu,
+ IIC_iStore_bh_i, IIC_iStore_bh_r,
+ IIC_iStore_bh_iu, IIC_iStore_bh_ru]>;
+def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>;
+
+// A9WriteML will be expanded into a separate write for each def
+// operand. Address generation consumes resources, but A9WriteLMAdr
+// is listed after all def operands, so has no effective latency.
+//
+// Note: A9WriteLM expands into an even number of def operands. The
+// actual number of def operands may be less by one.
+def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>;
+
+// Load multiple with address writeback has an extra def operand in
+// front of the loaded registers.
+//
+// Reuse the load-multiple variants for store-multiple because the
+// resources are identical, For stores only the address writeback
+// has a def operand so the WriteL latencies are unused.
+def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
+ IIC_iStore_m,
+ IIC_iStore_mu]>;
+def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
+def :ItinRW<[A9WriteL, A9WriteAdr, A9WriteA], [IIC_iLoadiALU]>;
+
+def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
+
+def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>;
+def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>;
+def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64,
+ IIC_fpStore_m, IIC_fpStore_mu]>;
+
+// Note: Unlike VLDM, VLD1 expects the writeback operand after the
+// normal writes.
+def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u,
+ IIC_VLD1x2, IIC_VLD1x2u]>;
+def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u,
+ IIC_VLD1x4, IIC_VLD1x4u,
+ IIC_VLD4dup, IIC_VLD4dupu]>;
+def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu,
+ IIC_VLD2, IIC_VLD2u,
+ IIC_VLD2dup, IIC_VLD2dupu]>;
+def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu,
+ IIC_VLD2x2, IIC_VLD2x2u,
+ IIC_VLD2ln, IIC_VLD2lnu]>;
+def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u,
+ IIC_VLD3dup, IIC_VLD3dupu]>;
+def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u,
+ IIC_VLD4ln, IIC_VLD4lnu]>;
+def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>;
+
+// Vector stores use similar resources to vector loads, so use the
+// same write types. The address write must be first for stores with
+// address writeback.
+def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u,
+ IIC_VST1x2, IIC_VST1x2u,
+ IIC_VST1ln, IIC_VST1lnu,
+ IIC_VST2, IIC_VST2u,
+ IIC_VST2x2, IIC_VST2x2u,
+ IIC_VST2ln, IIC_VST2lnu]>;
+def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u,
+ IIC_VST1x4, IIC_VST1x4u,
+ IIC_VST3, IIC_VST3u,
+ IIC_VST3ln, IIC_VST3lnu,
+ IIC_VST4, IIC_VST4u,
+ IIC_VST4ln, IIC_VST4lnu]>;
+
+// NEON moves.
+def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>;
+def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>;
+def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>;
+
+// NEON integer arithmetic
+//
+// VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL
+def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>;
+// VSUB/VMVN/VCLSD/VCLZD/VCNTD
+def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>;
+// VADDL/VSUBL/VNEG are mapped later under IIC_SHLi.
+// ...
+// VHADD/VRHADD/VQADD/VTST/VADH/VRADH
+def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>;
+// VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL
+def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>;
+// VQNEG/VQABS
+def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>;
+// VABS
+def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>;
+// VPADD/VPADDL are mapped later under IIC_SHLi.
+// ...
+// VCLSQ/VCLZQ/VCNTQ, takes two cycles.
+def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>;
+// VMOVimm/VMVNimm/VORRimm/VBICimm
+def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>;
+def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>;
+def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>;
+
+// NEON integer multiply
+//
+// Note: these don't quite match the timing docs, but they do match
+// the original A9 itinerary.
+def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>;
+def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>;
+def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>;
+def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>;
+def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>;
+def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>;
+def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>;
+def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>;
+
+// NEON integer shift
+// TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles.
+def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>;
+def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>;
+
+// NEON permute
+def :ItinRW<[A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>;
+def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2],
+ [IIC_VPERMQ3, IIC_VEXTQ]>;
+def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>;
+def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>;
+def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>;
+def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>;
+def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>;
+def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>;
+def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>;
+def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3],
+ [IIC_VTBX4]>;
+// NEON floating-point
+def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>;
+def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>;
+def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>;
+def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>;
+def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
+def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
+def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
+def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
+} // SchedModel = CortexA9Model
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
new file mode 100644
index 0000000..e9bc3e0
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -0,0 +1,1085 @@
+//=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Swift processor..
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// This section contains legacy support for itineraries. This is
+// required until SD and PostRA schedulers are replaced by MachineScheduler.
+
+def SW_DIS0 : FuncUnit;
+def SW_DIS1 : FuncUnit;
+def SW_DIS2 : FuncUnit;
+
+def SW_ALU0 : FuncUnit;
+def SW_ALU1 : FuncUnit;
+def SW_LS : FuncUnit;
+def SW_IDIV : FuncUnit;
+def SW_FDIV : FuncUnit;
+
+// FIXME: Need bypasses.
+// FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and
+// IIC_iMOVix2ld better.
+// FIXME: Model the special immediate shifts that are not microcoded.
+// FIXME: Do we need to model the fact that uses of r15 in a micro-op force it
+// to issue on pipe 1?
+// FIXME: Model the pipelined behavior of CMP / TST instructions.
+// FIXME: Better model the microcode stages of multiply instructions, especially
+// conditional variants.
+// FIXME: Add preload instruction when it is documented.
+// FIXME: Model non-pipelined nature of FP div / sqrt unit.
+
+def SwiftItineraries : ProcessorItineraries<
+ [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_LS]>],
+ [5]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+
+ // CLZ, RBIT, etc.
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+
+ // BFC, BFI, UBFX, SBFX
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1]>,
+
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ //
+ // Move instructions, conditional
+ // FIXME: Correctly model the extra input dep on the destination.
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_ALU0], 3>,
+ InstrStage<1, [SW_ALU0]>],
+ [5, 5, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [5, 6, 1, 1]>,
+ //
+ // Integer divide
+ InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0], 0>,
+ InstrStage<14, [SW_IDIV]>],
+ [14, 1, 1]>,
+
+ // Integer load pipeline
+ // FIXME: The timings are some rough approximations
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 4, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [3, 4, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [5, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [3, 4, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [5, 3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [5, 3, 1, 1]>,
+ //
+ // Load multiple, def is the 5th operand.
+ // FIXME: This assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1, 3], [], -1>, // dynamic uops
+
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 3], [], -1>, // dynamic uops
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [4, 1]>,
+
+ // Integer store pipeline
+ ///
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
+ [3, 1, 1, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [], [], -1>, // dynamic uops
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [2], [], -1>, // dynamic uops
+
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>,
+
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ //
+ // Single-precision FP Unary
+ //
+ // Most floating-point moves get issued on ALU0.
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [1, 1]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+
+ //
+ // Single to Half FP Convert
+ InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU1], 4>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1]>,
+ //
+ // Half to Single FP Convert
+ InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1, 1]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [12, 1, 1]>,
+ //
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [12, 1, 1]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<15, [SW_FDIV]>],
+ [17, 1, 1]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<30, [SW_FDIV]>],
+ [32, 1, 1]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<15, [SW_FDIV]>],
+ [17, 1]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<30, [SW_FDIV]>],
+ [32, 1, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0]>],
+ [6, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_LS]>],
+ [3, 4, 1]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1]>,
+ //
+ // FP Load Multiple
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 4], [], -1>, // dynamic uops
+ //
+ // FP Load Multiple + update
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1, 4], [], -1>, // dynamic uops
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Store
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ //
+ // FP Store Multiple
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1], [], -1>, // dynamic uops
+ //
+ // FP Store Multiple + update
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1], [], -1>, // dynamic uops
+ // NEON
+ //
+ // Double-register Integer Unary
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Quad-register Integer Unary
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Double-register Integer Q-Unary
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Quad-register Integer CountQ-Unary
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Double-register Integer Binary
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Binary
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Integer Shift
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Shift
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+
+ //
+ // Double-register Integer Count
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Count
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1, 1]>,
+ //
+ // Quad-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1, 1]>,
+ //
+ // Double-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+
+ //
+ // Double-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+
+ //
+ // Double-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Move Immediate
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2]>,
+ //
+ // Double-register Permute Move
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1]>,
+ //
+ // Quad-register Permute Move
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0]>],
+ [6, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_LS]>],
+ [3, 4, 1]>,
+ //
+ // Integer to Lane Move
+ // FIXME: I think this is correct, but it is not clear from the tuning guide.
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0]>],
+ [6, 1]>,
+
+ //
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1]>,
+ //
+ // Double-register FP Unary
+ // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
+ // and they issue on a different pipeline.
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Quad-register FP Unary
+ // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
+ // and they issue on a different pipeline.
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Double-register FP Binary
+ // FIXME: We're using this itin for many instructions.
+ InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register FP Binary
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Double-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Quad-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-register Fused FP Multiple-Accumulate
+ InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Quad-register FusedF P Multiple-Accumulate
+ InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-register Reciprical Step
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Quad-register Reciprical Step
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-register Permute
+ // FIXME: The latencies are unclear from the documentation.
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [3, 4, 3, 4]>,
+ //
+ // Quad-register Permute
+ // FIXME: The latencies are unclear from the documentation.
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [3, 4, 3, 4]>,
+ //
+ // Quad-register Permute (3 cycle issue on A9)
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [3, 4, 3, 4]>,
+
+ //
+ // Double-register VEXT
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register VEXT
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ //
+ // VTB
+ InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 3, 3]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1, 3, 5, 5]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 3, 5, 7, 7]>,
+ //
+ // VTBX
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 3, 3]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1, 3, 5, 5]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 3, 5, 7, 7]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// This following definitions describe the simple machine model which
+// will replace itineraries.
+
+// Swift machine model for scheduling and other instruction cost heuristics.
+def SwiftModel : SchedMachineModel {
+ let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
+ let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
+ let LoadLatency = 3;
+
+ let Itineraries = SwiftItineraries;
+}
+
+// TODO: Add Swift processor and scheduler resources.
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 31d5d38..b33b3c9 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -155,7 +155,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
TargetLowering::ArgListEntry Entry;
// First argument: data pointer
- Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*DAG.getContext());
Entry.Node = Dst;
Entry.Ty = IntPtrTy;
Args.push_back(Entry);
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 4762854..bcc9db4 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -13,8 +13,9 @@
#include "ARMSubtarget.h"
#include "ARMBaseRegisterInfo.h"
+#include "ARMBaseInstrInfo.h"
#include "llvm/GlobalValue.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
#define GET_SUBTARGETINFO_TARGET_DESC
@@ -31,6 +32,10 @@ static cl::opt<bool>
DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
static cl::opt<bool>
+UseFusedMulOps("arm-use-mulops",
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
StrictAlign("arm-strict-align", cl::Hidden,
cl::desc("Disallow all unaligned memory accesses"));
@@ -49,6 +54,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
, HasVFPv4(false)
, HasNEON(false)
, UseNEONForSinglePrecisionFP(false)
+ , UseMulOps(UseFusedMulOps)
, SlowFPVMLx(false)
, HasVMLxForwarding(false)
, SlowFPBrcc(false)
@@ -63,6 +69,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
, HasFP16(false)
, HasD16(false)
, HasHardwareDivide(false)
+ , HasHardwareDivideInARM(false)
, HasT2ExtractPack(false)
, HasDataBarrier(false)
, Pref32BitThumb(false)
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index b394061..8e6b650 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -30,7 +30,7 @@ class StringRef;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
- Others, CortexA8, CortexA9
+ Others, CortexA8, CortexA9, CortexA15, Swift
};
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
@@ -57,6 +57,10 @@ protected:
/// determine if NEON should actually be used.
bool UseNEONForSinglePrecisionFP;
+ /// UseMulOps - True if non-microcoded fused integer multiply-add and
+ /// multiply-subtract instructions should be used.
+ bool UseMulOps;
+
/// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
/// whether the FP VML[AS] instructions are slow (if so, don't use them).
bool SlowFPVMLx;
@@ -107,6 +111,9 @@ protected:
/// HasHardwareDivide - True if subtarget supports [su]div
bool HasHardwareDivide;
+ /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode
+ bool HasHardwareDivideInARM;
+
/// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
/// instructions.
bool HasT2ExtractPack;
@@ -199,7 +206,10 @@ protected:
bool isCortexA8() const { return ARMProcFamily == CortexA8; }
bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+ bool isCortexA15() const { return ARMProcFamily == CortexA15; }
+ bool isSwift() const { return ARMProcFamily == Swift; }
bool isCortexM3() const { return CPUString == "cortex-m3"; }
+ bool isLikeA9() const { return isCortexA9() || isCortexA15(); }
bool hasARMOps() const { return !NoARM; }
@@ -211,8 +221,10 @@ protected:
return hasNEON() && UseNEONForSinglePrecisionFP; }
bool hasDivide() const { return HasHardwareDivide; }
+ bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool hasDataBarrier() const { return HasDataBarrier; }
+ bool useMulOps() const { return UseMulOps; }
bool useFPVMLx() const { return !SlowFPVMLx; }
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 171c9ad..b486d4f 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -60,7 +60,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL)
: ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
InstrInfo(Subtarget),
- DataLayout(Subtarget.isAPCS_ABI() ?
+ DL(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
"v128:32:128-v64:32:64-n32-S32") :
Subtarget.isAAPCS_ABI() ?
@@ -68,10 +68,10 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
"v128:64:128-v64:64:64-n32-S64") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
"v128:64:128-v64:64:64-n32-S32")),
- ELFWriterInfo(*this),
TLInfo(*this),
TSInfo(*this),
- FrameLowering(Subtarget) {
+ FrameLowering(Subtarget),
+ STTI(&TLInfo), VTTI(&TLInfo) {
if (!Subtarget.hasARMOps())
report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
"support ARM mode execution!");
@@ -88,7 +88,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
InstrInfo(Subtarget.hasThumb2()
? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
- DataLayout(Subtarget.isAPCS_ABI() ?
+ DL(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
"i16:16:32-i8:8:32-i1:8:32-"
"v128:32:128-v64:32:64-a:0:32-n32-S32") :
@@ -99,12 +99,12 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
std::string("e-p:32:32-f64:64:64-i64:64:64-"
"i16:16:32-i8:8:32-i1:8:32-"
"v128:64:128-v64:64:64-a:0:32-n32-S32")),
- ELFWriterInfo(*this),
TLInfo(*this),
TSInfo(*this),
FrameLowering(Subtarget.hasThumb2()
? new ARMFrameLowering(Subtarget)
- : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+ : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)),
+ STTI(&TLInfo), VTTI(&TLInfo) {
}
namespace {
@@ -143,6 +143,11 @@ bool ARMPassConfig::addPreISel() {
bool ARMPassConfig::addInstSelector() {
addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
+
+ const ARMSubtarget *Subtarget = &getARMSubtarget();
+ if (Subtarget->isTargetELF() && !Subtarget->isThumb1Only() &&
+ TM->Options.EnableFastISel)
+ addPass(createARMGlobalBaseRegPass());
return false;
}
@@ -150,7 +155,7 @@ bool ARMPassConfig::addPreRegAlloc() {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only())
addPass(createARMLoadStoreOptimizationPass(true));
- if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
+ if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9())
addPass(createMLxExpansionPass());
return true;
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index abcdb24..ebdd5b4 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -15,7 +15,6 @@
#define ARMTARGETMACHINE_H
#include "ARMInstrInfo.h"
-#include "ARMELFWriterInfo.h"
#include "ARMFrameLowering.h"
#include "ARMJITInfo.h"
#include "ARMSubtarget.h"
@@ -25,7 +24,8 @@
#include "Thumb1FrameLowering.h"
#include "Thumb2InstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetTransformImpl.h"
+#include "llvm/DataLayout.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/ADT/OwningPtr.h"
@@ -62,11 +62,12 @@ public:
class ARMTargetMachine : public ARMBaseTargetMachine {
virtual void anchor();
ARMInstrInfo InstrInfo;
- const TargetData DataLayout; // Calculates type size & alignment
- ARMELFWriterInfo ELFWriterInfo;
+ const DataLayout DL; // Calculates type size & alignment
ARMTargetLowering TLInfo;
ARMSelectionDAGInfo TSInfo;
ARMFrameLowering FrameLowering;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -88,12 +89,14 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
virtual const ARMFrameLowering *getFrameLowering() const {
return &FrameLowering;
}
-
- virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const TargetData *getTargetData() const { return &DataLayout; }
- virtual const ARMELFWriterInfo *getELFWriterInfo() const {
- return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
}
+ virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const DataLayout *getDataLayout() const { return &DL; }
};
/// ThumbTargetMachine - Thumb target machine.
@@ -104,12 +107,13 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
virtual void anchor();
// Either Thumb1InstrInfo or Thumb2InstrInfo.
OwningPtr<ARMBaseInstrInfo> InstrInfo;
- const TargetData DataLayout; // Calculates type size & alignment
- ARMELFWriterInfo ELFWriterInfo;
+ const DataLayout DL; // Calculates type size & alignment
ARMTargetLowering TLInfo;
ARMSelectionDAGInfo TSInfo;
// Either Thumb1FrameLowering or ARMFrameLowering.
OwningPtr<ARMFrameLowering> FrameLowering;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -138,10 +142,13 @@ public:
virtual const ARMFrameLowering *getFrameLowering() const {
return FrameLowering.get();
}
- virtual const TargetData *getTargetData() const { return &DataLayout; }
- virtual const ARMELFWriterInfo *getELFWriterInfo() const {
- return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
}
+ virtual const DataLayout *getDataLayout() const { return &DL; }
};
} // end namespace llvm
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 3a5957b..c61e3bd 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -181,49 +181,44 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index);
// Asm Match Converter Methods
- bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtT2StrdPre(MCInst &Inst, unsigned Opcode,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+ void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtT2StrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+ void cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ void cvtLdWriteBackRegAddrMode2(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+ void cvtLdWriteBackRegAddrModeImm12(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+ void cvtStWriteBackRegAddrModeImm12(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ void cvtStWriteBackRegAddrMode2(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ void cvtStWriteBackRegAddrMode3(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ void cvtLdExtTWriteBackImm(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ void cvtLdExtTWriteBackReg(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ void cvtStExtTWriteBackImm(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ void cvtStExtTWriteBackReg(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtLdrdPre(MCInst &Inst, unsigned Opcode,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtStrdPre(MCInst &Inst, unsigned Opcode,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ void cvtLdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtStrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtLdWriteBackRegAddrMode3(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
+ void cvtThumbMultiply(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
+ void cvtVLDwbFixed(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
+ void cvtVLDwbRegister(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
+ void cvtVSTwbFixed(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
+ void cvtVSTwbRegister(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
-
bool validateInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
bool processInstruction(MCInst &Inst,
@@ -258,15 +253,17 @@ public:
// Implementation of the MCTargetAsmParser interface:
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
- bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
bool ParseDirective(AsmToken DirectiveID);
unsigned checkTargetMatchPredicate(MCInst &Inst);
- bool MatchAndEmitInstruction(SMLoc IDLoc,
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out);
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm);
};
} // end anonymous namespace
@@ -486,7 +483,8 @@ public:
SMLoc getStartLoc() const { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
-
+ /// getLocRange - Get the range between the first and last token of this
+ /// operand.
SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
ARMCC::CondCodes getCondCode() const {
@@ -862,7 +860,7 @@ public:
bool isSPRRegList() const { return Kind == k_SPRRegisterList; }
bool isToken() const { return Kind == k_Token; }
bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; }
- bool isMemory() const { return Kind == k_Memory; }
+ bool isMem() const { return Kind == k_Memory; }
bool isShifterImm() const { return Kind == k_ShifterImmediate; }
bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; }
bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; }
@@ -873,14 +871,14 @@ public:
return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift;
}
bool isMemNoOffset(bool alignOK = false) const {
- if (!isMemory())
+ if (!isMem())
return false;
// No offset of any kind.
return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 &&
(alignOK || Memory.Alignment == 0);
}
bool isMemPCRelImm12() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Base register must be PC.
if (Memory.BaseRegNum != ARM::PC)
@@ -894,7 +892,7 @@ public:
return isMemNoOffset(true);
}
bool isAddrMode2() const {
- if (!isMemory() || Memory.Alignment != 0) return false;
+ if (!isMem() || Memory.Alignment != 0) return false;
// Check for register offset.
if (Memory.OffsetRegNum) return true;
// Immediate offset in range [-4095, 4095].
@@ -916,7 +914,7 @@ public:
// and we reject it.
if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
- if (!isMemory() || Memory.Alignment != 0) return false;
+ if (!isMem() || Memory.Alignment != 0) return false;
// No shifts are legal for AM3.
if (Memory.ShiftType != ARM_AM::no_shift) return false;
// Check for register offset.
@@ -946,7 +944,7 @@ public:
// and we reject it.
if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
- if (!isMemory() || Memory.Alignment != 0) return false;
+ if (!isMem() || Memory.Alignment != 0) return false;
// Check for register offset.
if (Memory.OffsetRegNum) return false;
// Immediate offset in range [-1020, 1020] and a multiple of 4.
@@ -956,25 +954,25 @@ public:
Val == INT32_MIN;
}
bool isMemTBB() const {
- if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
return false;
return true;
}
bool isMemTBH() const {
- if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm != 1 ||
Memory.Alignment != 0 )
return false;
return true;
}
bool isMemRegOffset() const {
- if (!isMemory() || !Memory.OffsetRegNum || Memory.Alignment != 0)
+ if (!isMem() || !Memory.OffsetRegNum || Memory.Alignment != 0)
return false;
return true;
}
bool isT2MemRegOffset() const {
- if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
Memory.Alignment != 0)
return false;
// Only lsl #{0, 1, 2, 3} allowed.
@@ -987,14 +985,14 @@ public:
bool isMemThumbRR() const {
// Thumb reg+reg addressing is simple. Just two registers, a base and
// an offset. No shifts, negations or any other complicating factors.
- if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
return false;
return isARMLowRegister(Memory.BaseRegNum) &&
(!Memory.OffsetRegNum || isARMLowRegister(Memory.OffsetRegNum));
}
bool isMemThumbRIs4() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
!isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
return false;
// Immediate offset, multiple of 4 in range [0, 124].
@@ -1003,7 +1001,7 @@ public:
return Val >= 0 && Val <= 124 && (Val % 4) == 0;
}
bool isMemThumbRIs2() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
!isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
return false;
// Immediate offset, multiple of 4 in range [0, 62].
@@ -1012,7 +1010,7 @@ public:
return Val >= 0 && Val <= 62 && (Val % 2) == 0;
}
bool isMemThumbRIs1() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
!isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
return false;
// Immediate offset in range [0, 31].
@@ -1021,7 +1019,7 @@ public:
return Val >= 0 && Val <= 31;
}
bool isMemThumbSPI() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
Memory.BaseRegNum != ARM::SP || Memory.Alignment != 0)
return false;
// Immediate offset, multiple of 4 in range [0, 1020].
@@ -1035,7 +1033,7 @@ public:
// and we reject it.
if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset a multiple of 4 in range [-1020, 1020].
if (!Memory.OffsetImm) return true;
@@ -1044,7 +1042,7 @@ public:
return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || Val == INT32_MIN;
}
bool isMemImm0_1020s4Offset() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset a multiple of 4 in range [0, 1020].
if (!Memory.OffsetImm) return true;
@@ -1052,7 +1050,7 @@ public:
return Val >= 0 && Val <= 1020 && (Val & 3) == 0;
}
bool isMemImm8Offset() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Base reg of PC isn't allowed for these encodings.
if (Memory.BaseRegNum == ARM::PC) return false;
@@ -1062,7 +1060,7 @@ public:
return (Val == INT32_MIN) || (Val > -256 && Val < 256);
}
bool isMemPosImm8Offset() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset in range [0, 255].
if (!Memory.OffsetImm) return true;
@@ -1070,7 +1068,7 @@ public:
return Val >= 0 && Val < 256;
}
bool isMemNegImm8Offset() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Base reg of PC isn't allowed for these encodings.
if (Memory.BaseRegNum == ARM::PC) return false;
@@ -1080,7 +1078,7 @@ public:
return (Val == INT32_MIN) || (Val > -256 && Val < 0);
}
bool isMemUImm12Offset() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset in range [0, 4095].
if (!Memory.OffsetImm) return true;
@@ -1094,7 +1092,7 @@ public:
if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset in range [-4095, 4095].
if (!Memory.OffsetImm) return true;
@@ -3376,7 +3374,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
StringRef Mask = Tok.getString();
if (isMClass()) {
@@ -3880,8 +3879,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// cvtT2LdrdPre - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtT2LdrdPre(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Rt, Rt2
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
@@ -3892,14 +3891,13 @@ cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtT2StrdPre - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtT2StrdPre(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtT2StrdPre(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateReg(0));
@@ -3910,14 +3908,13 @@ cvtT2StrdPre(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
@@ -3926,28 +3923,26 @@ cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtLdWriteBackRegAddrMode2(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
@@ -3956,14 +3951,13 @@ cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtLdWriteBackRegAddrModeImm12(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
@@ -3972,57 +3966,53 @@ cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtStWriteBackRegAddrModeImm12(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtStWriteBackRegAddrMode2(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtStWriteBackRegAddrMode3(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtLdExtTWriteBackImm(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Rt
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
@@ -4034,14 +4024,13 @@ cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtLdExtTWriteBackReg(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Rt
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
@@ -4053,14 +4042,13 @@ cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtStExtTWriteBackImm - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtStExtTWriteBackImm(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
@@ -4072,14 +4060,13 @@ cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtStExtTWriteBackReg - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtStExtTWriteBackReg(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
@@ -4091,14 +4078,13 @@ cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtLdrdPre - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtLdrdPre(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtLdrdPre(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Rt, Rt2
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
@@ -4109,14 +4095,13 @@ cvtLdrdPre(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtStrdPre - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtStrdPre(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtStrdPre(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
@@ -4127,40 +4112,27 @@ cvtStrdPre(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtLdWriteBackRegAddrMode3(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
-/// cvtThumbMultiple- Convert parsed operands to MCInst.
+/// cvtThumbMultiply - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtThumbMultiply(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // The second source operand must be the same register as the destination
- // operand.
- if (Operands.size() == 6 &&
- (((ARMOperand*)Operands[3])->getReg() !=
- ((ARMOperand*)Operands[5])->getReg()) &&
- (((ARMOperand*)Operands[3])->getReg() !=
- ((ARMOperand*)Operands[4])->getReg())) {
- Error(Operands[3]->getStartLoc(),
- "destination register must match source register");
- return false;
- }
((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1);
// If we have a three-operand form, make sure to set Rn to be the operand
@@ -4173,12 +4145,10 @@ cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1);
Inst.addOperand(Inst.getOperand(0));
((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2);
-
- return true;
}
-bool ARMAsmParser::
-cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtVLDwbFixed(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Vd
((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
@@ -4188,11 +4158,10 @@ cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
-bool ARMAsmParser::
-cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtVLDwbRegister(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Vd
((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
@@ -4204,11 +4173,10 @@ cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
-bool ARMAsmParser::
-cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtVSTwbFixed(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
@@ -4218,11 +4186,10 @@ cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
-bool ARMAsmParser::
-cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtVSTwbRegister(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
@@ -4234,7 +4201,6 @@ cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// Parse an ARM memory expression, return false if successful else return true
@@ -4471,6 +4437,12 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
((St == ARM_AM::lsl || St == ARM_AM::ror) && Imm > 31) ||
((St == ARM_AM::lsr || St == ARM_AM::asr) && Imm > 32))
return Error(Loc, "immediate shift value out of range");
+ // If <ShiftTy> #0, turn it into a no_shift.
+ if (Imm == 0)
+ St = ARM_AM::lsl;
+ // For consistency, treat lsr #32 and asr #32 as having immediate value 0.
+ if (Imm == 32)
+ Imm = 0;
Amount = Imm;
}
@@ -4648,7 +4620,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return true;
const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
- getContext());
+ getContext());
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E));
return false;
@@ -4983,7 +4955,8 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
/// Parse an arm instruction mnemonic followed by its operands.
-bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
+bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Apply mnemonic aliases before doing anything else, as the destination
// mnemnonic may include suffices and we want to handle them normally.
@@ -5377,6 +5350,25 @@ validateInstruction(MCInst &Inst,
"in register list");
break;
}
+ case ARM::tMUL: {
+ // The second source operand must be the same register as the destination
+ // operand.
+ //
+ // In this case, we must directly check the parsed operands because the
+ // cvtThumbMultiply() function is written in such a way that it guarantees
+ // this first statement is always true for the new Inst. Essentially, the
+ // destination is unconditionally copied into the second source operand
+ // without checking to see if it matches what we actually parsed.
+ if (Operands.size() == 6 &&
+ (((ARMOperand*)Operands[3])->getReg() !=
+ ((ARMOperand*)Operands[5])->getReg()) &&
+ (((ARMOperand*)Operands[3])->getReg() !=
+ ((ARMOperand*)Operands[4])->getReg())) {
+ return Error(Operands[3]->getStartLoc(),
+ "destination register must match source register");
+ }
+ break;
+ }
// Like for ldm/stm, push and pop have hi-reg handling version in Thumb2,
// so only issue a diagnostic for thumb1. The instructions will be
// switched to the t2 encodings in processInstruction() if necessary.
@@ -5678,6 +5670,20 @@ bool ARMAsmParser::
processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
switch (Inst.getOpcode()) {
+ // Alias for alternate form of 'ADR Rd, #imm' instruction.
+ case ARM::ADDri: {
+ if (Inst.getOperand(1).getReg() != ARM::PC ||
+ Inst.getOperand(5).getReg() != 0)
+ return false;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADR);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
// Aliases for alternate PC+imm syntax of LDR instructions.
case ARM::t2LDRpcrel:
Inst.setOpcode(ARM::t2LDRpci);
@@ -7471,13 +7477,14 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
static const char *getSubtargetFeatureName(unsigned Val);
bool ARMAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc,
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out) {
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
MCInst Inst;
- unsigned ErrorInfo;
unsigned MatchResult;
- MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+ MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm);
switch (MatchResult) {
default: break;
case Match_Success:
@@ -7540,9 +7547,6 @@ MatchAndEmitInstruction(SMLoc IDLoc,
case Match_MnemonicFail:
return Error(IDLoc, "invalid instruction",
((ARMOperand*)Operands[0])->getLocRange());
- case Match_ConversionFail:
- // The converter function will have already emitted a diagnostic.
- return true;
case Match_RequiresNotITBlock:
return Error(IDLoc, "flag setting instruction only valid outside IT block");
case Match_RequiresITBlock:
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index ac916cc..377bd92 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -22,7 +22,6 @@ add_llvm_target(ARMCodeGen
ARMCodeEmitter.cpp
ARMConstantIslandPass.cpp
ARMConstantPoolValue.cpp
- ARMELFWriterInfo.cpp
ARMExpandPseudoInsts.cpp
ARMFastISel.cpp
ARMFrameLowering.cpp
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index c90751d..f00142d 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -525,8 +525,9 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
else
ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
const char *ReferenceName;
- const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
- &ReferenceName);
+ uint64_t SymbolValue = 0x00000000ffffffffULL & Value;
+ const char *Name = SymbolLookUp(DisInfo, SymbolValue, &ReferenceType,
+ Address, &ReferenceName);
if (Name) {
SymbolicOp.AddSymbol.Name = Name;
SymbolicOp.AddSymbol.Present = true;
@@ -1523,6 +1524,8 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
}
unsigned amt = fieldFromInstruction(Insn, 7, 5);
+ if (Opc == ARM_AM::ror && amt == 0)
+ Opc = ARM_AM::rrx;
unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode);
Inst.addOperand(MCOperand::CreateImm(imm));
@@ -1564,6 +1567,9 @@ static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
break;
}
+ if (ShOp == ARM_AM::ror && imm == 0)
+ ShOp = ARM_AM::rrx;
+
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
@@ -2089,16 +2095,28 @@ static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
static DecodeStatus
DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- DecodeStatus S = MCDisassembler::Success;
- unsigned imm = (fieldFromInstruction(Insn, 0, 11) << 0) |
- (fieldFromInstruction(Insn, 11, 1) << 18) |
- (fieldFromInstruction(Insn, 13, 1) << 17) |
- (fieldFromInstruction(Insn, 16, 6) << 11) |
- (fieldFromInstruction(Insn, 26, 1) << 19);
- if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4,
+ DecodeStatus Status = MCDisassembler::Success;
+
+ // Note the J1 and J2 values are from the encoded instruction. So here
+ // change them to I1 and I2 values via as documented:
+ // I1 = NOT(J1 EOR S);
+ // I2 = NOT(J2 EOR S);
+ // and build the imm32 with one trailing zero as documented:
+ // imm32 = SignExtend(S:I1:I2:imm10:imm11:'0', 32);
+ unsigned S = fieldFromInstruction(Insn, 26, 1);
+ unsigned J1 = fieldFromInstruction(Insn, 13, 1);
+ unsigned J2 = fieldFromInstruction(Insn, 11, 1);
+ unsigned I1 = !(J1 ^ S);
+ unsigned I2 = !(J2 ^ S);
+ unsigned imm10 = fieldFromInstruction(Insn, 16, 10);
+ unsigned imm11 = fieldFromInstruction(Insn, 0, 11);
+ unsigned tmp = (S << 23) | (I1 << 22) | (I2 << 21) | (imm10 << 11) | imm11;
+ int imm32 = SignExtend32<24>(tmp << 1);
+ if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4,
true, 4, Inst, Decoder))
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1)));
- return S;
+ Inst.addOperand(MCOperand::CreateImm(imm32));
+
+ return Status;
}
static DecodeStatus
@@ -2701,6 +2719,8 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
unsigned align = fieldFromInstruction(Insn, 4, 1);
unsigned size = fieldFromInstruction(Insn, 6, 2);
+ if (size == 0 && align == 1)
+ return MCDisassembler::Fail;
align *= (1 << size);
switch (Inst.getOpcode()) {
@@ -2831,6 +2851,8 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
unsigned align = fieldFromInstruction(Insn, 4, 1);
if (size == 0x3) {
+ if (align == 0)
+ return MCDisassembler::Fail;
size = 4;
align = 16;
} else {
@@ -3170,7 +3192,7 @@ static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
int imm = Val & 0xFF;
if (!(Val & 0x100)) imm *= -1;
- Inst.addOperand(MCOperand::CreateImm(imm << 2));
+ Inst.addOperand(MCOperand::CreateImm(imm * 4));
}
return MCDisassembler::Success;
@@ -3710,8 +3732,16 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
if (fieldFromInstruction(Insn, 6, 1))
return MCDisassembler::Fail; // UNDEFINED
index = fieldFromInstruction(Insn, 7, 1);
- if (fieldFromInstruction(Insn, 4, 2) != 0)
- align = 4;
+
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0 :
+ align = 0; break;
+ case 3:
+ align = 4; break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ break;
}
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -3769,8 +3799,16 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
if (fieldFromInstruction(Insn, 6, 1))
return MCDisassembler::Fail; // UNDEFINED
index = fieldFromInstruction(Insn, 7, 1);
- if (fieldFromInstruction(Insn, 4, 2) != 0)
- align = 4;
+
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0:
+ align = 0; break;
+ case 3:
+ align = 4; break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ break;
}
if (Rm != 0xF) { // Writeback
@@ -4090,8 +4128,15 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
inc = 2;
break;
case 2:
- if (fieldFromInstruction(Insn, 4, 2))
- align = 4 << fieldFromInstruction(Insn, 4, 2);
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0:
+ align = 0; break;
+ case 3:
+ return MCDisassembler::Fail;
+ default:
+ align = 4 << fieldFromInstruction(Insn, 4, 2); break;
+ }
+
index = fieldFromInstruction(Insn, 7, 1);
if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
@@ -4164,8 +4209,15 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
inc = 2;
break;
case 2:
- if (fieldFromInstruction(Insn, 4, 2))
- align = 4 << fieldFromInstruction(Insn, 4, 2);
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0:
+ align = 0; break;
+ case 3:
+ return MCDisassembler::Fail;
+ default:
+ align = 4 << fieldFromInstruction(Insn, 4, 2); break;
+ }
+
index = fieldFromInstruction(Insn, 7, 1);
if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 8b9109e..dcc41d9 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -29,11 +29,33 @@ using namespace llvm;
///
/// getSORegOffset returns an integer from 0-31, representing '32' as 0.
static unsigned translateShiftImm(unsigned imm) {
+ // lsr #32 and asr #32 exist, but should be encoded as a 0.
+ assert((imm & ~0x1f) == 0 && "Invalid shift encoding");
+
if (imm == 0)
return 32;
return imm;
}
+/// Prints the shift value with an immediate value.
+static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
+ unsigned ShImm, bool UseMarkup) {
+ if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm))
+ return;
+ O << ", ";
+
+ assert (!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0");
+ O << getShiftOpcStr(ShOpc);
+
+ if (ShOpc != ARM_AM::rrx) {
+ O << " ";
+ if (UseMarkup)
+ O << "<imm:";
+ O << "#" << translateShiftImm(ShImm);
+ if (UseMarkup)
+ O << ">";
+ }
+}
ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
@@ -45,7 +67,9 @@ ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
}
void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << getRegisterName(RegNo);
+ OS << markup("<reg:")
+ << getRegisterName(RegNo)
+ << markup(">");
}
void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
@@ -85,10 +109,13 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
printSBitModifierOperand(MI, 6, O);
printPredicateOperand(MI, 4, O);
- O << '\t' << getRegisterName(Dst.getReg())
- << ", " << getRegisterName(MO1.getReg());
+ O << '\t';
+ printRegName(O, Dst.getReg());
+ O << ", ";
+ printRegName(O, MO1.getReg());
- O << ", " << getRegisterName(MO2.getReg());
+ O << ", ";
+ printRegName(O, MO2.getReg());
assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
printAnnotation(O, Annot);
return;
@@ -104,15 +131,20 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
printSBitModifierOperand(MI, 5, O);
printPredicateOperand(MI, 3, O);
- O << '\t' << getRegisterName(Dst.getReg())
- << ", " << getRegisterName(MO1.getReg());
+ O << '\t';
+ printRegName(O, Dst.getReg());
+ O << ", ";
+ printRegName(O, MO1.getReg());
if (ARM_AM::getSORegShOp(MO2.getImm()) == ARM_AM::rrx) {
printAnnotation(O, Annot);
return;
}
- O << ", #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
+ O << ", "
+ << markup("<imm:")
+ << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()))
+ << markup(">");
printAnnotation(O, Annot);
return;
}
@@ -136,7 +168,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
MI->getOperand(3).getImm() == -4) {
O << '\t' << "push";
printPredicateOperand(MI, 4, O);
- O << "\t{" << getRegisterName(MI->getOperand(1).getReg()) << "}";
+ O << "\t{";
+ printRegName(O, MI->getOperand(1).getReg());
+ O << "}";
printAnnotation(O, Annot);
return;
}
@@ -159,7 +193,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
MI->getOperand(4).getImm() == 4) {
O << '\t' << "pop";
printPredicateOperand(MI, 5, O);
- O << "\t{" << getRegisterName(MI->getOperand(0).getReg()) << "}";
+ O << "\t{";
+ printRegName(O, MI->getOperand(0).getReg());
+ O << "}";
printAnnotation(O, Annot);
return;
}
@@ -198,7 +234,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
O << "\tldm";
printPredicateOperand(MI, 1, O);
- O << '\t' << getRegisterName(BaseReg);
+ O << '\t';
+ printRegName(O, BaseReg);
if (Writeback) O << "!";
O << ", ";
printRegisterList(MI, 3, O);
@@ -224,9 +261,11 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
unsigned Reg = Op.getReg();
- O << getRegisterName(Reg);
+ printRegName(O, Reg);
} else if (Op.isImm()) {
- O << '#' << Op.getImm();
+ O << markup("<imm:")
+ << '#' << Op.getImm()
+ << markup(">");
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
// If a symbolic branch target was added as a constant expression then print
@@ -244,13 +283,16 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
}
-void ARMInstPrinter::printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
if (MO1.isExpr())
O << *MO1.getExpr();
- else if (MO1.isImm())
- O << "[pc, #" << MO1.getImm() << "]";
+ else if (MO1.isImm()) {
+ O << markup("<mem:") << "[pc, "
+ << markup("<imm:") << "#" << MO1.getImm()
+ << markup(">]>", "]");
+ }
else
llvm_unreachable("Unknown LDR label operand?");
}
@@ -266,7 +308,7 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO2 = MI->getOperand(OpNum+1);
const MCOperand &MO3 = MI->getOperand(OpNum+2);
- O << getRegisterName(MO1.getReg());
+ printRegName(O, MO1.getReg());
// Print the shift opc.
ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
@@ -274,7 +316,8 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
if (ShOpc == ARM_AM::rrx)
return;
- O << ' ' << getRegisterName(MO2.getReg());
+ O << ' ';
+ printRegName(O, MO2.getReg());
assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
}
@@ -283,14 +326,11 @@ void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
- O << getRegisterName(MO1.getReg());
+ printRegName(O, MO1.getReg());
// Print the shift opc.
- ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
- O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
- if (ShOpc == ARM_AM::rrx)
- return;
- O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
+ printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()),
+ ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
}
@@ -304,67 +344,51 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
const MCOperand &MO2 = MI->getOperand(Op+1);
const MCOperand &MO3 = MI->getOperand(Op+2);
- O << "[" << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
if (!MO2.getReg()) {
- if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
- O << ", #"
+ if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0.
+ O << ", "
+ << markup("<imm:")
+ << "#"
<< ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
- << ARM_AM::getAM2Offset(MO3.getImm());
- O << "]";
- return;
- }
-
- O << ", "
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
- << getRegisterName(MO2.getReg());
-
- if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
- << " #" << ShImm;
- O << "]";
-}
-
-void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- const MCOperand &MO1 = MI->getOperand(Op);
- const MCOperand &MO2 = MI->getOperand(Op+1);
- const MCOperand &MO3 = MI->getOperand(Op+2);
-
- O << "[" << getRegisterName(MO1.getReg()) << "], ";
-
- if (!MO2.getReg()) {
- unsigned ImmOffs = ARM_AM::getAM2Offset(MO3.getImm());
- O << '#'
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
- << ImmOffs;
+ << ARM_AM::getAM2Offset(MO3.getImm())
+ << markup(">");
+ }
+ O << "]" << markup(">");
return;
}
- O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
- << getRegisterName(MO2.getReg());
+ O << ", ";
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()));
+ printRegName(O, MO2.getReg());
- if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
- << " #" << ShImm;
+ printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO3.getImm()),
+ ARM_AM::getAM2Offset(MO3.getImm()), UseMarkup);
+ O << "]" << markup(">");
}
void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
const MCOperand &MO2 = MI->getOperand(Op+1);
- O << "[" << getRegisterName(MO1.getReg()) << ", "
- << getRegisterName(MO2.getReg()) << "]";
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << ", ";
+ printRegName(O, MO2.getReg());
+ O << "]" << markup(">");
}
void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
const MCOperand &MO2 = MI->getOperand(Op+1);
- O << "[" << getRegisterName(MO1.getReg()) << ", "
- << getRegisterName(MO2.getReg()) << ", lsl #1]";
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << ", ";
+ printRegName(O, MO2.getReg());
+ O << ", lsl " << markup("<imm:") << "#1" << markup(">") << "]" << markup(">");
}
void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
@@ -376,13 +400,13 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
return;
}
+#ifndef NDEBUG
const MCOperand &MO3 = MI->getOperand(Op+2);
unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm());
+ assert(IdxMode != ARMII::IndexModePost &&
+ "Should be pre or offset index op");
+#endif
- if (IdxMode == ARMII::IndexModePost) {
- printAM2PostIndexOp(MI, Op, O);
- return;
- }
printAM2PreOrOffsetIndexOp(MI, Op, O);
}
@@ -394,19 +418,18 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
if (!MO1.getReg()) {
unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
- O << '#'
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
- << ImmOffs;
+ O << markup("<imm:")
+ << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+ << ImmOffs
+ << markup(">");
return;
}
- O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
- << getRegisterName(MO1.getReg());
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()));
+ printRegName(O, MO1.getReg());
- if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
- << " #" << ShImm;
+ printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO2.getImm()),
+ ARM_AM::getAM2Offset(MO2.getImm()), UseMarkup);
}
//===--------------------------------------------------------------------===//
@@ -419,18 +442,22 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
const MCOperand &MO2 = MI->getOperand(Op+1);
const MCOperand &MO3 = MI->getOperand(Op+2);
- O << "[" << getRegisterName(MO1.getReg()) << "], ";
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << "], " << markup(">");
if (MO2.getReg()) {
- O << (char)ARM_AM::getAM3Op(MO3.getImm())
- << getRegisterName(MO2.getReg());
+ O << (char)ARM_AM::getAM3Op(MO3.getImm());
+ printRegName(O, MO2.getReg());
return;
}
unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
- O << '#'
+ O << markup("<imm:")
+ << '#'
<< ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
- << ImmOffs;
+ << ImmOffs
+ << markup(">");
}
void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
@@ -439,23 +466,29 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
const MCOperand &MO2 = MI->getOperand(Op+1);
const MCOperand &MO3 = MI->getOperand(Op+2);
- O << '[' << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << '[';
+ printRegName(O, MO1.getReg());
if (MO2.getReg()) {
- O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
- << getRegisterName(MO2.getReg()) << ']';
+ O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()));
+ printRegName(O, MO2.getReg());
+ O << ']' << markup(">");
return;
}
- //If the op is sub we have to print the immediate even if it is 0
+ //If the op is sub we have to print the immediate even if it is 0
unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
-
- if (ImmOffs || (op == ARM_AM::sub))
- O << ", #"
+
+ if (ImmOffs || (op == ARM_AM::sub)) {
+ O << ", "
+ << markup("<imm:")
+ << "#"
<< ARM_AM::getAddrOpcStr(op)
- << ImmOffs;
- O << ']';
+ << ImmOffs
+ << markup(">");
+ }
+ O << ']' << markup(">");
}
void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
@@ -483,15 +516,15 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
const MCOperand &MO2 = MI->getOperand(OpNum+1);
if (MO1.getReg()) {
- O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
- << getRegisterName(MO1.getReg());
+ O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()));
+ printRegName(O, MO1.getReg());
return;
}
unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
- O << '#'
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
- << ImmOffs;
+ O << markup("<imm:")
+ << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
+ << markup(">");
}
void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI,
@@ -499,7 +532,9 @@ void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
unsigned Imm = MO.getImm();
- O << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff);
+ O << markup("<imm:")
+ << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
+ << markup(">");
}
void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
@@ -507,7 +542,8 @@ void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
- O << (MO2.getImm() ? "" : "-") << getRegisterName(MO1.getReg());
+ O << (MO2.getImm() ? "" : "-");
+ printRegName(O, MO1.getReg());
}
void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI,
@@ -515,7 +551,9 @@ void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
unsigned Imm = MO.getImm();
- O << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2);
+ O << markup("<imm:")
+ << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
+ << markup(">");
}
@@ -536,16 +574,20 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
return;
}
- O << "[" << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
if (ImmOffs || Op == ARM_AM::sub) {
- O << ", #"
+ O << ", "
+ << markup("<imm:")
+ << "#"
<< ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
- << ImmOffs * 4;
+ << ImmOffs * 4
+ << markup(">");
}
- O << "]";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
@@ -553,18 +595,21 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
- O << "[" << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
if (MO2.getImm()) {
// FIXME: Both darwin as and GNU as violate ARM docs here.
O << ", :" << (MO2.getImm() << 3);
}
- O << "]";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- O << "[" << getRegisterName(MO1.getReg()) << "]";
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << "]" << markup(">");
}
void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
@@ -573,8 +618,10 @@ void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
const MCOperand &MO = MI->getOperand(OpNum);
if (MO.getReg() == 0)
O << "!";
- else
- O << ", " << getRegisterName(MO.getReg());
+ else {
+ O << ", ";
+ printRegName(O, MO.getReg());
+ }
}
void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
@@ -585,7 +632,9 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
int32_t lsb = CountTrailingZeros_32(v);
int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
- O << '#' << lsb << ", #" << width;
+ O << markup("<imm:") << '#' << lsb << markup(">")
+ << ", "
+ << markup("<imm:") << '#' << width << markup(">");
}
void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
@@ -599,10 +648,18 @@ void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
unsigned ShiftOp = MI->getOperand(OpNum).getImm();
bool isASR = (ShiftOp & (1 << 5)) != 0;
unsigned Amt = ShiftOp & 0x1f;
- if (isASR)
- O << ", asr #" << (Amt == 0 ? 32 : Amt);
- else if (Amt)
- O << ", lsl #" << Amt;
+ if (isASR) {
+ O << ", asr "
+ << markup("<imm:")
+ << "#" << (Amt == 0 ? 32 : Amt)
+ << markup(">");
+ }
+ else if (Amt) {
+ O << ", lsl "
+ << markup("<imm:")
+ << "#" << Amt
+ << markup(">");
+ }
}
void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
@@ -611,7 +668,7 @@ void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
if (Imm == 0)
return;
assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!");
- O << ", lsl #" << Imm;
+ O << ", lsl " << markup("<imm:") << "#" << Imm << markup(">");
}
void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
@@ -621,7 +678,7 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
if (Imm == 0)
Imm = 32;
assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!");
- O << ", asr #" << Imm;
+ O << ", asr " << markup("<imm:") << "#" << Imm << markup(">");
}
void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
@@ -629,7 +686,7 @@ void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
O << "{";
for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
if (i != OpNum) O << ", ";
- O << getRegisterName(MI->getOperand(i).getReg());
+ printRegName(O, MI->getOperand(i).getReg());
}
O << "}";
}
@@ -803,23 +860,29 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
int32_t OffImm = (int32_t)MO.getImm();
+ O << markup("<imm:");
if (OffImm == INT32_MIN)
O << "#-0";
else if (OffImm < 0)
O << "#-" << -OffImm;
else
O << "#" << OffImm;
+ O << markup(">");
}
void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- O << "#" << MI->getOperand(OpNum).getImm() * 4;
+ O << markup("<imm:")
+ << "#" << MI->getOperand(OpNum).getImm() * 4
+ << markup(">");
}
void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
- O << "#" << (Imm == 0 ? 32 : Imm);
+ O << markup("<imm:")
+ << "#" << (Imm == 0 ? 32 : Imm)
+ << markup(">");
}
void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
@@ -849,10 +912,13 @@ void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
return;
}
- O << "[" << getRegisterName(MO1.getReg());
- if (unsigned RegNum = MO2.getReg())
- O << ", " << getRegisterName(RegNum);
- O << "]";
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ if (unsigned RegNum = MO2.getReg()) {
+ O << ", ";
+ printRegName(O, RegNum);
+ }
+ O << "]" << markup(">");
}
void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
@@ -867,10 +933,15 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
return;
}
- O << "[" << getRegisterName(MO1.getReg());
- if (unsigned ImmOffs = MO2.getImm())
- O << ", #" << ImmOffs * Scale;
- O << "]";
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ if (unsigned ImmOffs = MO2.getImm()) {
+ O << ", "
+ << markup("<imm:")
+ << "#" << ImmOffs * Scale
+ << markup(">");
+ }
+ O << "]" << markup(">");
}
void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
@@ -906,14 +977,12 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO2 = MI->getOperand(OpNum+1);
unsigned Reg = MO1.getReg();
- O << getRegisterName(Reg);
+ printRegName(O, Reg);
// Print the shift opc.
assert(MO2.isImm() && "Not a valid t2_so_reg value!");
- ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
- O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
- if (ShOpc != ARM_AM::rrx)
- O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
+ printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()),
+ ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
}
void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
@@ -926,18 +995,27 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
return;
}
- O << "[" << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm();
bool isSub = OffImm < 0;
// Special value for #-0. All others are normal.
if (OffImm == INT32_MIN)
OffImm = 0;
- if (isSub)
- O << ", #-" << -OffImm;
- else if (OffImm > 0)
- O << ", #" << OffImm;
- O << "]";
+ if (isSub) {
+ O << ", "
+ << markup("<imm:")
+ << "#-" << -OffImm
+ << markup(">");
+ }
+ else if (OffImm > 0) {
+ O << ", "
+ << markup("<imm:")
+ << "#" << OffImm
+ << markup(">");
+ }
+ O << "]" << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
@@ -946,17 +1024,24 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
- O << "[" << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm();
// Don't print +0.
+ if (OffImm != 0)
+ O << ", ";
+ if (OffImm != 0 && UseMarkup)
+ O << "<imm:";
if (OffImm == INT32_MIN)
- O << ", #-0";
+ O << "#-0";
else if (OffImm < 0)
- O << ", #-" << -OffImm;
+ O << "#-" << -OffImm;
else if (OffImm > 0)
- O << ", #" << OffImm;
- O << "]";
+ O << "#" << OffImm;
+ if (OffImm != 0 && UseMarkup)
+ O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
@@ -970,20 +1055,27 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
return;
}
- O << "[" << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm();
assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
// Don't print +0.
+ if (OffImm != 0)
+ O << ", ";
+ if (OffImm != 0 && UseMarkup)
+ O << "<imm:";
if (OffImm == INT32_MIN)
- O << ", #-0";
+ O << "#-0";
else if (OffImm < 0)
- O << ", #-" << -OffImm;
+ O << "#-" << -OffImm;
else if (OffImm > 0)
- O << ", #" << OffImm;
- O << "]";
+ O << "#" << OffImm;
+ if (OffImm != 0 && UseMarkup)
+ O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
@@ -992,10 +1084,15 @@ void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
- O << "[" << getRegisterName(MO1.getReg());
- if (MO2.getImm())
- O << ", #" << MO2.getImm() * 4;
- O << "]";
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ if (MO2.getImm()) {
+ O << ", "
+ << markup("<imm:")
+ << "#" << MO2.getImm() * 4
+ << markup(">");
+ }
+ O << "]" << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
@@ -1003,11 +1100,12 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
int32_t OffImm = (int32_t)MO1.getImm();
- // Don't print +0.
+ O << ", " << markup("<imm:");
if (OffImm < 0)
- O << ", #-" << -OffImm;
+ O << "#-" << -OffImm;
else
- O << ", #" << OffImm;
+ O << "#" << OffImm;
+ O << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
@@ -1019,12 +1117,18 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
// Don't print +0.
+ if (OffImm != 0)
+ O << ", ";
+ if (OffImm != 0 && UseMarkup)
+ O << "<imm:";
if (OffImm == INT32_MIN)
- O << ", #-0";
+ O << "#-0";
else if (OffImm < 0)
- O << ", #-" << -OffImm;
+ O << "#-" << -OffImm;
else if (OffImm > 0)
- O << ", #" << OffImm;
+ O << "#" << OffImm;
+ if (OffImm != 0 && UseMarkup)
+ O << ">";
}
void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
@@ -1034,23 +1138,30 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
const MCOperand &MO2 = MI->getOperand(OpNum+1);
const MCOperand &MO3 = MI->getOperand(OpNum+2);
- O << "[" << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
assert(MO2.getReg() && "Invalid so_reg load / store address!");
- O << ", " << getRegisterName(MO2.getReg());
+ O << ", ";
+ printRegName(O, MO2.getReg());
unsigned ShAmt = MO3.getImm();
if (ShAmt) {
assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
- O << ", lsl #" << ShAmt;
+ O << ", lsl "
+ << markup("<imm:")
+ << "#" << ShAmt
+ << markup(">");
}
- O << "]";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
- O << '#' << ARM_AM::getFPImmFloat(MO.getImm());
+ O << markup("<imm:")
+ << '#' << ARM_AM::getFPImmFloat(MO.getImm())
+ << markup(">");
}
void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
@@ -1058,14 +1169,18 @@ void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
unsigned EncodedImm = MI->getOperand(OpNum).getImm();
unsigned EltBits;
uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
- O << "#0x";
+ O << markup("<imm:")
+ << "#0x";
O.write_hex(Val);
+ O << markup(">");
}
void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
- O << "#" << Imm + 1;
+ O << markup("<imm:")
+ << "#" << Imm + 1
+ << markup(">");
}
void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
@@ -1073,23 +1188,30 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
unsigned Imm = MI->getOperand(OpNum).getImm();
if (Imm == 0)
return;
- O << ", ror #";
+ O << ", ror "
+ << markup("<imm:")
+ << "#";
switch (Imm) {
default: assert (0 && "illegal ror immediate!");
case 1: O << "8"; break;
case 2: O << "16"; break;
case 3: O << "24"; break;
}
+ O << markup(">");
}
void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- O << "#" << 16 - MI->getOperand(OpNum).getImm();
+ O << markup("<imm:")
+ << "#" << 16 - MI->getOperand(OpNum).getImm()
+ << markup(">");
}
void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- O << "#" << 32 - MI->getOperand(OpNum).getImm();
+ O << markup("<imm:")
+ << "#" << 32 - MI->getOperand(OpNum).getImm()
+ << markup(">");
}
void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
@@ -1099,7 +1221,9 @@ void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "}";
}
void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
@@ -1107,7 +1231,11 @@ void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
- O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
+ O << "{";
+ printRegName(O, Reg0);
+ O << ", ";
+ printRegName(O, Reg1);
+ O << "}";
}
void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
@@ -1116,7 +1244,11 @@ void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
- O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
+ O << "{";
+ printRegName(O, Reg0);
+ O << ", ";
+ printRegName(O, Reg1);
+ O << "}";
}
void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
@@ -1124,9 +1256,13 @@ void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "}";
}
void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
@@ -1134,16 +1270,23 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 3);
+ O << "}";
}
void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[]}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[]}";
}
void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
@@ -1152,7 +1295,11 @@ void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
- O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
+ O << "{";
+ printRegName(O, Reg0);
+ O << "[], ";
+ printRegName(O, Reg1);
+ O << "[]}";
}
void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
@@ -1161,9 +1308,13 @@ void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[]}";
}
void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
@@ -1172,10 +1323,15 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 3);
+ O << "[]}";
}
void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
@@ -1184,7 +1340,11 @@ void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
- O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
+ O << "{";
+ printRegName(O, Reg0);
+ O << "[], ";
+ printRegName(O, Reg1);
+ O << "[]}";
}
void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI,
@@ -1193,9 +1353,13 @@ void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[]}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << "[]}";
}
void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
@@ -1204,10 +1368,15 @@ void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "[]}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 6);
+ O << "[]}";
}
void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
@@ -1216,9 +1385,13 @@ void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << "}";
}
void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI,
@@ -1227,8 +1400,13 @@ void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 6);
+ O << "}";
}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 73d7bfd..b7bab5f 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -126,7 +126,8 @@ public:
void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index ac6ce64..1ba6ab0 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -593,7 +593,9 @@ public:
const object::mach::CPUSubtypeARM Subtype;
DarwinARMAsmBackend(const Target &T, const StringRef TT,
object::mach::CPUSubtypeARM st)
- : ARMAsmBackend(T, TT), Subtype(st) { }
+ : ARMAsmBackend(T, TT), Subtype(st) {
+ HasDataInCodeSupport = true;
+ }
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
@@ -674,7 +676,7 @@ void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
} // end anonymous namespace
-MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) {
+MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin()) {
@@ -687,6 +689,15 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) {
else if (TheTriple.getArchName() == "armv6" ||
TheTriple.getArchName() == "thumbv6")
return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6);
+ else if (TheTriple.getArchName() == "armv7f" ||
+ TheTriple.getArchName() == "thumbv7f")
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7F);
+ else if (TheTriple.getArchName() == "armv7k" ||
+ TheTriple.getArchName() == "thumbv7k")
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7K);
+ else if (TheTriple.getArchName() == "armv7s" ||
+ TheTriple.getArchName() == "thumbv7s")
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7S);
return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 7d6acbc..99e4f71 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -194,6 +194,10 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case ARM::fixup_arm_uncondbranch:
Type = ELF::R_ARM_JUMP24;
break;
+ case ARM::fixup_t2_condbranch:
+ case ARM::fixup_t2_uncondbranch:
+ Type = ELF::R_ARM_THM_JUMP24;
+ break;
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
Type = ELF::R_ARM_MOVT_PREL;
@@ -242,6 +246,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_ARM_TARGET1:
Type = ELF::R_ARM_TARGET1;
break;
+ case MCSymbolRefExpr::VK_ARM_TARGET2:
+ Type = ELF::R_ARM_TARGET2;
+ break;
}
break;
case ARM::fixup_arm_ldst_pcrel_12:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index d32805e..c1aab9c 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -50,7 +50,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
Code32Directive = ".code\t32";
WeakRefDirective = "\t.weak\t";
- LCOMMDirectiveType = LCOMM::NoAlignment;
HasLEB128 = true;
SupportsDebugInformation = true;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 94f1082..d0e127a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -35,8 +35,8 @@ STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
namespace {
class ARMMCCodeEmitter : public MCCodeEmitter {
- ARMMCCodeEmitter(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
- void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+ ARMMCCodeEmitter(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION;
const MCInstrInfo &MCII;
const MCSubtargetInfo &STI;
const MCContext &CTX;
@@ -783,7 +783,7 @@ getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
// Immediate is always encoded as positive. The 'U' bit controls add vs sub.
if (Imm8 < 0)
- Imm8 = -Imm8;
+ Imm8 = -(uint32_t)Imm8;
// Scaled by 4.
Imm8 /= 4;
@@ -934,6 +934,10 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
unsigned SBits = getShiftOp(ShOp);
+ // While "lsr #32" and "asr #32" exist, they are encoded with a 0 in the shift
+ // amount. However, it would be an easy mistake to make so check here.
+ assert((ShImm & ~0x1f) == 0 && "Out of range shift amount");
+
// {16-13} = Rn
// {12} = isAdd
// {11-0} = shifter
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index a727e08..b404e6c 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -28,7 +28,7 @@ private:
explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr)
: Kind(_Kind), Expr(_Expr) {}
-
+
public:
/// @name Construction
/// @{
@@ -67,9 +67,6 @@ public:
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
}
-
- static bool classof(const ARMMCExpr *) { return true; }
-
};
} // end namespace llvm
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 5df84c8..00ffc94 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -71,6 +71,14 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
else
// Use CPU to figure out the exact features.
ARMArchFeature = "+v7";
+ } else if (Len >= Idx+2 && TT[Idx+1] == 's') {
+ if (NoCPU)
+ // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
+ // Swift
+ ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+t2xtpk";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
} else {
// v7 CPUs have lots of different feature sets. If no CPU is specified,
// then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 510302d..a89981e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -46,7 +46,7 @@ MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT);
+MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU);
/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index a51e0fa..2154c93 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -41,6 +41,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue);
+ bool requiresExternRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCFragment &Fragment,
+ unsigned RelocType, const MCSymbolData *SD,
+ uint64_t FixedValue);
+
public:
ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
uint32_t CPUSubtype)
@@ -305,6 +311,46 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
Writer->addRelocation(Fragment->getParent(), MRE);
}
+bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCFragment &Fragment,
+ unsigned RelocType,
+ const MCSymbolData *SD,
+ uint64_t FixedValue) {
+ // Most cases can be identified purely from the symbol.
+ if (Writer->doesSymbolRequireExternRelocation(SD))
+ return true;
+ int64_t Value = (int64_t)FixedValue; // The displacement is signed.
+ int64_t Range;
+ switch (RelocType) {
+ default:
+ return false;
+ case macho::RIT_ARM_Branch24Bit:
+ // PC pre-adjustment of 8 for these instructions.
+ Value -= 8;
+ // ARM BL/BLX has a 25-bit offset.
+ Range = 0x1ffffff;
+ break;
+ case macho::RIT_ARM_ThumbBranch22Bit:
+ // PC pre-adjustment of 4 for these instructions.
+ Value -= 4;
+ // Thumb BL/BLX has a 24-bit offset.
+ Range = 0xffffff;
+ }
+ // BL/BLX also use external relocations when an internal relocation
+ // would result in the target being out of range. This gives the linker
+ // enough information to generate a branch island.
+ const MCSectionData &SymSD = Asm.getSectionData(
+ SD->getSymbol().getSection());
+ Value += Writer->getSectionAddress(&SymSD);
+ Value -= Writer->getSectionAddress(Fragment.getParent());
+ // If the resultant value would be out of range for an internal relocation,
+ // use an external instead.
+ if (Value > Range || Value < -(Range + 1))
+ return true;
+ return false;
+}
+
void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
@@ -373,7 +419,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
}
// Check whether we need an external or internal relocation.
- if (Writer->doesSymbolRequireExternRelocation(SD)) {
+ if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD,
+ FixedValue)) {
IsExtern = 1;
Index = SD->getIndex();
@@ -410,7 +457,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
if (Type == macho::RIT_ARM_Half) {
// The other-half value only gets populated for the movt and movw
// relocation entries.
- uint32_t Value = 0;;
+ uint32_t Value = 0;
switch ((unsigned)Fixup.getKind()) {
default: break;
case ARM::fixup_arm_movw_lo16:
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index ad60e32..70643bc 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -51,7 +51,8 @@ namespace {
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
- bool isA9;
+ bool isLikeA9;
+ bool isSwift;
unsigned MIIdx;
MachineInstr* LastMIs[4];
SmallPtrSet<MachineInstr*, 4> IgnoreStall;
@@ -60,6 +61,7 @@ namespace {
void pushStack(MachineInstr *MI);
MachineInstr *getAccDefMI(MachineInstr *MI) const;
unsigned getDefReg(MachineInstr *MI) const;
+ bool hasLoopHazard(MachineInstr *MI) const;
bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
bool FindMLxHazard(MachineInstr *MI);
void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
@@ -135,6 +137,50 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
return Reg;
}
+/// hasLoopHazard - Check whether an MLx instruction is chained to itself across
+/// a single-MBB loop.
+bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const {
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return false;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ while (true) {
+outer_continue:
+ if (DefMI->getParent() != MBB)
+ break;
+
+ if (DefMI->isPHI()) {
+ for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
+ if (DefMI->getOperand(i + 1).getMBB() == MBB) {
+ unsigned SrcReg = DefMI->getOperand(i).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DefMI = MRI->getVRegDef(SrcReg);
+ goto outer_continue;
+ }
+ }
+ }
+ } else if (DefMI->isCopyLike()) {
+ Reg = DefMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ } else if (DefMI->isInsertSubreg()) {
+ Reg = DefMI->getOperand(2).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ }
+
+ break;
+ }
+
+ return DefMI == MI;
+}
+
bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
// FIXME: Detect integer instructions properly.
const MCInstrDesc &MCID = MI->getDesc();
@@ -149,6 +195,19 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
return false;
}
+static bool isFpMulInstruction(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::VMULS:
+ case ARM::VMULfd:
+ case ARM::VMULfq:
+ case ARM::VMULD:
+ case ARM::VMULslfd:
+ case ARM::VMULslfq:
+ return true;
+ default:
+ return false;
+ }
+}
bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
if (NumExpand >= ExpandLimit)
@@ -171,6 +230,12 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
return true;
}
+ // On Swift, we mostly care about hazards from multiplication instructions
+ // writing the accumulator and the pipelining of loop iterations by out-of-
+ // order execution.
+ if (isSwift)
+ return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI);
+
if (IgnoreStall.count(MI))
return false;
@@ -179,8 +244,8 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
// preserves the in-order retirement of the instructions.
// Look at the next few instructions, if *most* of them can cause hazards,
// then the scheduler can't *fix* this, we'd better break up the VMLA.
- unsigned Limit1 = isA9 ? 1 : 4;
- unsigned Limit2 = isA9 ? 1 : 4;
+ unsigned Limit1 = isLikeA9 ? 1 : 4;
+ unsigned Limit2 = isLikeA9 ? 1 : 4;
for (unsigned i = 1; i <= 4; ++i) {
int Idx = ((int)MIIdx - i + 4) % 4;
MachineInstr *NextMI = LastMIs[Idx];
@@ -316,7 +381,8 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
TRI = Fn.getTarget().getRegisterInfo();
MRI = &Fn.getRegInfo();
const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
- isA9 = STI->isCortexA9();
+ isLikeA9 = STI->isLikeA9() || STI->isSwift();
+ isSwift = STI->isSwift();
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
OpenPOWER on IntegriCloud