summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
-rw-r--r--contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp24
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.h19
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td525
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp189
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp971
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h87
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp82
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h17
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h21
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp429
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallLowering.h13
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td6
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp10
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp181
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp10
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h16
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp370
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp240
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFeatures.h4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp532
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp426
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp1823
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h51
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrFormats.td24
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp13
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.h4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td613
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td587
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td157
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td810
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td307
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp686
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h39
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp321
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h40
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp154
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp36
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp9
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h69
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp57
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMacroFusion.h24
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp284
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h16
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterBanks.td14
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSchedule.td74
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA57.td1471
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td323
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA9.td52
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleM3.td21
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleR52.td207
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td62
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp39
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp114
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h133
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp281
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.h73
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp51
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h12
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp38
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h48
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp494
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp139
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp9
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp194
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h24
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h2
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h6
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp52
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp190
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h84
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp65
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp32
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp11
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp188
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp21
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h21
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp24
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp168
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp53
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h4
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp21
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp95
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h4
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp90
-rw-r--r--contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp48
89 files changed, 10433 insertions, 4929 deletions
diff --git a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
index 89859ba..8640c87 100644
--- a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -427,13 +427,11 @@ unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
unsigned Lane, bool QPR) {
unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
&ARM::DPRRegClass);
- AddDefaultPred(BuildMI(MBB,
- InsertBefore,
- DL,
- TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d),
- Out)
- .addReg(Reg)
- .addImm(Lane));
+ BuildMI(MBB, InsertBefore, DL,
+ TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out)
+ .addReg(Reg)
+ .addImm(Lane)
+ .add(predOps(ARMCC::AL));
return Out;
}
@@ -476,13 +474,11 @@ unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
const DebugLoc &DL, unsigned Ssub0,
unsigned Ssub1) {
unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
- AddDefaultPred(BuildMI(MBB,
- InsertBefore,
- DL,
- TII->get(ARM::VEXTd32), Out)
- .addReg(Ssub0)
- .addReg(Ssub1)
- .addImm(1));
+ BuildMI(MBB, InsertBefore, DL, TII->get(ARM::VEXTd32), Out)
+ .addReg(Ssub0)
+ .addReg(Ssub1)
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
return Out;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
index be30482..4676226 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -16,21 +16,24 @@
#define LLVM_LIB_TARGET_ARM_ARM_H
#include "llvm/Support/CodeGen.h"
-#include "ARMBasicBlockInfo.h"
#include <functional>
+#include <vector>
namespace llvm {
class ARMAsmPrinter;
class ARMBaseTargetMachine;
+class ARMRegisterBankInfo;
+class ARMSubtarget;
+struct BasicBlockInfo;
class Function;
class FunctionPass;
-class ImmutablePass;
+class InstructionSelector;
+class MachineBasicBlock;
+class MachineFunction;
class MachineInstr;
class MCInst;
class PassRegistry;
-class TargetLowering;
-class TargetMachine;
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -43,6 +46,9 @@ FunctionPass *createThumb2ITBlockPass();
FunctionPass *createARMOptimizeBarriersPass();
FunctionPass *createThumb2SizeReductionPass(
std::function<bool(const Function &)> Ftor = nullptr);
+InstructionSelector *
+createARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI,
+ const ARMRegisterBankInfo &RBI);
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
@@ -53,7 +59,8 @@ std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF);
void initializeARMLoadStoreOptPass(PassRegistry &);
void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
+void initializeARMConstantIslandsPass(PassRegistry &);
-} // end namespace llvm;
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARM_H
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index 2a090fa..e49c1ba 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -17,144 +17,172 @@
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
-// ARM Helper classes.
+// ARM Subtarget state.
//
-class ProcNoItin<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
+def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode",
+ "true", "Thumb mode">;
+
+def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat",
+ "true", "Use software floating "
+ "point features.">;
-class Architecture<string fname, string aname, list<SubtargetFeature> features >
- : SubtargetFeature<fname, "ARMArch", aname,
- !strconcat(aname, " architecture"), features>;
//===----------------------------------------------------------------------===//
-// ARM Subtarget state.
+// ARM Subtarget features.
//
-def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true",
- "Thumb mode">;
+// Floating Point, HW Division and Neon Support
+def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
+ "Enable VFP2 instructions">;
-def ModeSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
- "Use software floating point features.">;
+def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
+ "Enable VFP3 instructions",
+ [FeatureVFP2]>;
-//===----------------------------------------------------------------------===//
-// ARM Subtarget features.
-//
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+ "Enable NEON instructions",
+ [FeatureVFP3]>;
+
+def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
+ "Enable half-precision "
+ "floating point">;
+
+def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
+ "Enable VFP4 instructions",
+ [FeatureVFP3, FeatureFP16]>;
+
+def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8",
+ "true", "Enable ARMv8 FP",
+ [FeatureVFP4]>;
+
+def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
+ "Enable full half-precision "
+ "floating point",
+ [FeatureFPARMv8]>;
+
+def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
+ "Floating point unit supports "
+ "single precision only">;
+
+def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
+ "Restrict FP to 16 double registers">;
+
+def FeatureHWDivThumb : SubtargetFeature<"hwdiv",
+ "HasHardwareDivideInThumb", "true",
+ "Enable divide instructions in Thumb">;
+
+def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
+ "HasHardwareDivideInARM", "true",
+ "Enable divide instructions in ARM mode">;
+
+// Atomic Support
+def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
+ "Has data barrier (dmb/dsb) instructions">;
+
+def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true",
+ "Has v7 clrex instruction">;
-def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
- "Enable VFP2 instructions">;
-def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
- "Enable VFP3 instructions",
- [FeatureVFP2]>;
-def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
- "Enable NEON instructions",
- [FeatureVFP3]>;
-def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
- "Enable Thumb2 instructions">;
-def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
- "Does not support ARM mode execution",
- [ModeThumb]>;
-def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
- "Enable half-precision floating point">;
-def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
- "Enable VFP4 instructions",
- [FeatureVFP3, FeatureFP16]>;
-def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8",
- "true", "Enable ARMv8 FP",
- [FeatureVFP4]>;
-def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
- "Enable full half-precision floating point",
- [FeatureFPARMv8]>;
-def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
- "Restrict FP to 16 double registers">;
-def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
- "Enable divide instructions">;
-def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
- "HasHardwareDivideInARM", "true",
- "Enable divide instructions in ARM mode">;
-def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
- "Enable Thumb2 extract and pack instructions">;
-def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
- "Has data barrier (dmb / dsb) instructions">;
-def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true",
- "Has v7 clrex instruction">;
def FeatureAcquireRelease : SubtargetFeature<"acquire-release",
"HasAcquireRelease", "true",
- "Has v8 acquire/release (lda/ldaex etc) instructions">;
-def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
- "FP compare + branch is slow">;
-def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
- "Floating point unit supports single precision only">;
-def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
- "Enable support for Performance Monitor extensions">;
-def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
- "Enable support for TrustZone security extensions">;
-def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true",
- "Enable support for ARMv8-M Security Extensions">;
-def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
- "Enable support for Cryptography extensions",
- [FeatureNEON]>;
-def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
- "Enable support for CRC instructions">;
+ "Has v8 acquire/release (lda/ldaex "
+ " etc) instructions">;
+
+
+def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
+ "FP compare + branch is slow">;
+
+def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
+ "Enable support for Performance "
+ "Monitor extensions">;
+
+
+// TrustZone Security Extensions
+def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
+ "Enable support for TrustZone "
+ "security extensions">;
+
+def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true",
+ "Enable support for ARMv8-M "
+ "Security Extensions">;
+
+def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
+ "Enable support for "
+ "Cryptography extensions",
+ [FeatureNEON]>;
+
+def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
+ "Enable support for CRC instructions">;
+
+
// Not to be confused with FeatureHasRetAddrStack (return address stack)
-def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
- "Enable Reliability, Availability and Serviceability extensions">;
-def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true",
- "Enable fast computation of positive address offsets">;
+def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
+ "Enable Reliability, Availability "
+ "and Serviceability extensions">;
+// Fast computation of non-negative address offsets
+def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true",
+ "Enable fast computation of "
+ "positive address offsets">;
-// Cyclone has preferred instructions for zeroing VFP registers, which can
-// execute in 0 cycles.
-def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
- "Has zero-cycle zeroing instructions">;
+// Fast execution of AES crypto operations
+def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true",
+ "CPU fuses AES crypto operations">;
-// Whether or not it may be profitable to unpredicate certain instructions
-// during if conversion.
+// Cyclone can zero VFP registers in 0 cycles.
+def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
+ "Has zero-cycle zeroing instructions">;
+
+// Whether it is profitable to unpredicate certain instructions during if-conversion
def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr",
- "IsProfitableToUnpredicate",
- "true",
+ "IsProfitableToUnpredicate", "true",
"Is profitable to unpredicate">;
// Some targets (e.g. Swift) have microcoded VGETLNi32.
-def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32",
- "HasSlowVGETLNi32", "true",
- "Has slow VGETLNi32 - prefer VMOV">;
+def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32",
+ "HasSlowVGETLNi32", "true",
+ "Has slow VGETLNi32 - prefer VMOV">;
// Some targets (e.g. Swift) have microcoded VDUP32.
-def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", "true",
- "Has slow VDUP32 - prefer VMOV">;
+def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32",
+ "true",
+ "Has slow VDUP32 - prefer VMOV">;
// Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON
// for scalar FP, as this allows more effective execution domain optimization.
-def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR",
- "true", "Prefer VMOVSR">;
+def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR",
+ "true", "Prefer VMOVSR">;
// Swift has ISHST barriers compatible with Atomic Release semantics but weaker
// than ISH
def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST",
- "true", "Prefer ISHST barriers">;
+ "true", "Prefer ISHST barriers">;
// Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU.
-def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", "true",
- "Has muxed AGU and NEON/FPU">;
+def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits",
+ "true",
+ "Has muxed AGU and NEON/FPU">;
-// On some targets, a VLDM/VSTM starting with an odd register number needs more
-// microops than single VLDRS.
+// Whether VLDM/VSTM starting with odd register number need more microops
+// than single VLDRS
def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister",
- "true", "VLDM/VSTM starting with an odd register is slow">;
+ "true", "VLDM/VSTM starting "
+ "with an odd register is slow">;
// Some targets have a renaming dependency when loading into D subregisters.
def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg",
"SlowLoadDSubregister", "true",
"Loading into D subregs is slow">;
+
// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD.
def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs",
"DontWidenVMOVS", "true",
"Don't widen VMOVS to VMOVD">;
// Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions.
-def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", "ExpandMLx", "true",
- "Expand VFP/NEON MLA/MLS instructions">;
+def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx",
+ "ExpandMLx", "true",
+ "Expand VFP/NEON MLA/MLS instructions">;
// Some targets have special RAW hazards for VFP/NEON VMLA/VMLS.
def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards",
@@ -162,15 +190,18 @@ def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards",
// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from
// VFP to NEON, as an execution domain optimization.
-def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs",
- "true", "Convert VMOVSR, VMOVRS, VMOVS to NEON">;
+def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs",
+ "UseNEONForFPMovs", "true",
+ "Convert VMOVSR, VMOVRS, "
+ "VMOVS to NEON">;
// Some processors benefit from using NEON instructions for scalar
// single-precision FP operations. This affects instruction selection and should
// only be enabled if the handling of denormals is not important.
-def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
- "true",
- "Use NEON for single precision FP">;
+def FeatureNEONForFP : SubtargetFeature<"neonfp",
+ "UseNEONForSinglePrecisionFP",
+ "true",
+ "Use NEON for single precision FP">;
// On some processors, VLDn instructions that access unaligned data take one
// extra cycle. Take that into account when computing operand latencies.
@@ -181,18 +212,18 @@ def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign",
// Some processors have a nonpipelined VFP coprocessor.
def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp",
"NonpipelinedVFP", "true",
- "VFP instructions are not pipelined">;
+ "VFP instructions are not pipelined">;
// Some processors have FP multiply-accumulate instructions that don't
// play nicely with other VFP / NEON instructions, and it's generally better
// to just not use them.
-def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
- "Disable VFP / NEON MAC instructions">;
+def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
+ "Disable VFP / NEON MAC instructions">;
// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding.
def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
- "HasVMLxForwarding", "true",
- "Has multiplier accumulator forwarding">;
+ "HasVMLxForwarding", "true",
+ "Has multiplier accumulator forwarding">;
// Disable 32-bit to 16-bit narrowing for experimentation.
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
@@ -206,62 +237,105 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
"AvoidCPSRPartialUpdate", "true",
"Avoid CPSR partial update for OOO execution">;
-def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
- "AvoidMOVsShifterOperand", "true",
- "Avoid movs instructions with shifter operand">;
+/// Disable +1 predication cost for instructions updating CPSR.
+/// Enabled for Cortex-A57.
+def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr",
+ "CheapPredicableCPSRDef",
+ "true",
+ "Disable +1 predication cost for instructions updating CPSR">;
+
+def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
+ "AvoidMOVsShifterOperand", "true",
+ "Avoid movs instructions with "
+ "shifter operand">;
// Some processors perform return stack prediction. CodeGen should avoid issue
// "normal" call instructions to callees which do not return.
-def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", "HasRetAddrStack", "true",
- "Has return address stack">;
+def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack",
+ "HasRetAddrStack", "true",
+ "Has return address stack">;
+
+// Some processors have no branch predictor, which changes the expected cost of
+// taking a branch which affects the choice of whether to use predicated
+// instructions.
+def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor",
+ "HasBranchPredictor", "false",
+ "Has no branch predictor">;
/// DSP extension.
-def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true",
- "Supports DSP instructions in ARM and/or Thumb2">;
+def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true",
+ "Supports DSP instructions in "
+ "ARM and/or Thumb2">;
// Multiprocessing extension.
-def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
- "Supports Multiprocessing extension">;
+def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
+ "Supports Multiprocessing extension">;
// Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8).
def FeatureVirtualization : SubtargetFeature<"virtualization",
- "HasVirtualization", "true",
- "Supports Virtualization extension",
- [FeatureHWDiv, FeatureHWDivARM]>;
+ "HasVirtualization", "true",
+ "Supports Virtualization extension",
+ [FeatureHWDivThumb, FeatureHWDivARM]>;
-// M-series ISA
-def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass",
- "Is microcontroller profile ('M' series)">;
+// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too.
+// See ARMInstrInfo.td for details.
+def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
+ "NaCl trap">;
-// R-series ISA
-def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass",
- "Is realtime profile ('R' series)">;
+def FeatureStrictAlign : SubtargetFeature<"strict-align",
+ "StrictAlign", "true",
+ "Disallow all unaligned memory "
+ "access">;
+
+def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
+ "Generate calls via indirect call "
+ "instructions">;
+
+def FeatureExecuteOnly : SubtargetFeature<"execute-only",
+ "GenExecuteOnly", "true",
+ "Enable the generation of "
+ "execute only code.">;
+
+def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true",
+ "Reserve R9, making it unavailable"
+ " as GPR">;
+
+def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true",
+ "Don't use movt/movw pairs for "
+ "32-bit imms">;
+
+def FeatureNoNegativeImmediates
+ : SubtargetFeature<"no-neg-immediates",
+ "NegativeImmediates", "false",
+ "Convert immediates and instructions "
+ "to their negated or complemented "
+ "equivalent when the immediate does "
+ "not fit in the encoding.">;
+
+
+//===----------------------------------------------------------------------===//
+// ARM architecture class
+//
// A-series ISA
def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass",
"Is application profile ('A' series)">;
-// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too.
-// See ARMInstrInfo.td for details.
-def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
- "NaCl trap">;
+// R-series ISA
+def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass",
+ "Is realtime profile ('R' series)">;
-def FeatureStrictAlign : SubtargetFeature<"strict-align",
- "StrictAlign", "true",
- "Disallow all unaligned memory "
- "access">;
+// M-series ISA
+def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass",
+ "Is microcontroller profile ('M' series)">;
-def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
- "Generate calls via indirect call "
- "instructions">;
-def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true",
- "Reserve R9, making it unavailable as "
- "GPR">;
+def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
+ "Enable Thumb2 instructions">;
-def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true",
- "Don't use movt/movw pairs for 32-bit "
- "imms">;
+def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
+ "Does not support ARM mode execution",
+ [ModeThumb]>;
//===----------------------------------------------------------------------===//
@@ -270,44 +344,57 @@ def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true",
def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
"Support ARM v4T instructions">;
+
def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true",
"Support ARM v5T instructions",
[HasV4TOps]>;
+
def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true",
- "Support ARM v5TE, v5TEj, and v5TExp instructions",
+ "Support ARM v5TE, v5TEj, and "
+ "v5TExp instructions",
[HasV5TOps]>;
+
def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true",
"Support ARM v6 instructions",
[HasV5TEOps]>;
+
def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true",
"Support ARM v6M instructions",
[HasV6Ops]>;
+
def HasV8MBaselineOps : SubtargetFeature<"v8m", "HasV8MBaselineOps", "true",
"Support ARM v8M Baseline instructions",
[HasV6MOps]>;
+
def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true",
"Support ARM v6k instructions",
[HasV6Ops]>;
+
def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
"Support ARM v6t2 instructions",
[HasV8MBaselineOps, HasV6KOps, FeatureThumb2]>;
+
def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
"Support ARM v7 instructions",
[HasV6T2Ops, FeaturePerfMon,
FeatureV7Clrex]>;
+
+def HasV8MMainlineOps :
+ SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true",
+ "Support ARM v8M Mainline instructions",
+ [HasV7Ops]>;
+
def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
- [HasV7Ops, FeatureAcquireRelease,
- FeatureT2XtPk]>;
+ [HasV7Ops, FeatureAcquireRelease]>;
+
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
"Support ARM v8.1a instructions",
[HasV8Ops]>;
-def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
+
+def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
"Support ARM v8.2a instructions",
[HasV8_1aOps]>;
-def HasV8MMainlineOps : SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true",
- "Support ARM v8M Mainline instructions",
- [HasV7Ops]>;
//===----------------------------------------------------------------------===//
@@ -342,7 +429,9 @@ def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
"Cortex-A73 ARM processors", []>;
def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
- "Qualcomm ARM processors", []>;
+ "Qualcomm Krait processors", []>;
+def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
+ "Qualcomm Kryo processors", []>;
def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
"Swift ARM processors", []>;
@@ -361,11 +450,17 @@ def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52",
def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3",
"Cortex-M3 ARM processors", []>;
+
//===----------------------------------------------------------------------===//
-// ARM schedules.
+// ARM Helper classes.
//
-include "ARMSchedule.td"
+class Architecture<string fname, string aname, list<SubtargetFeature> features>
+ : SubtargetFeature<fname, "ARMArch", aname,
+ !strconcat(aname, " architecture"), features>;
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
//===----------------------------------------------------------------------===//
@@ -393,8 +488,7 @@ def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>;
def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops]>;
def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops,
- FeatureDSP,
- FeatureT2XtPk]>;
+ FeatureDSP]>;
def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>;
@@ -415,31 +509,37 @@ def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops,
FeatureNEON,
FeatureDB,
FeatureDSP,
- FeatureAClass,
- FeatureT2XtPk]>;
+ FeatureAClass]>;
+
+def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops,
+ FeatureNEON,
+ FeatureDB,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureAClass]>;
def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops,
FeatureDB,
FeatureDSP,
- FeatureHWDiv,
- FeatureRClass,
- FeatureT2XtPk]>;
+ FeatureHWDivThumb,
+ FeatureRClass]>;
def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops,
FeatureThumb2,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureMClass]>;
def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops,
FeatureThumb2,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureMClass,
- FeatureDSP,
- FeatureT2XtPk]>;
+ FeatureDSP]>;
def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops,
FeatureAClass,
@@ -481,9 +581,6 @@ def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps,
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
FeatureRClass,
FeatureDB,
- FeatureHWDiv,
- FeatureHWDivARM,
- FeatureT2XtPk,
FeatureDSP,
FeatureCRC,
FeatureMP,
@@ -495,7 +592,7 @@ def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline",
[HasV8MBaselineOps,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureV7Clrex,
Feature8MSecExt,
FeatureAcquireRelease,
@@ -505,7 +602,7 @@ def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline",
[HasV8MMainlineOps,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
Feature8MSecExt,
FeatureAcquireRelease,
FeatureMClass]>;
@@ -520,11 +617,20 @@ def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>;
//===----------------------------------------------------------------------===//
+// ARM schedules.
+//===----------------------------------------------------------------------===//
+//
+include "ARMSchedule.td"
+
+//===----------------------------------------------------------------------===//
// ARM processors
//
// Dummy CPU, used to target architectures
-def : ProcNoItin<"generic", []>;
+def : ProcessorModel<"generic", CortexA8Model, []>;
+
+// FIXME: Several processors below are not using their own scheduler
+// model, but one of similar/previous processor. These should be fixed.
def : ProcNoItin<"arm8", [ARMv4]>;
def : ProcNoItin<"arm810", [ARMv4]>;
@@ -569,6 +675,7 @@ def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m]>;
def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m]>;
def : Processor<"sc000", ARMV6Itineraries, [ARMv6m]>;
+def : Processor<"arm1176j-s", ARMV6Itineraries, [ARMv6kz]>;
def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>;
def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz,
FeatureVFP2,
@@ -584,7 +691,6 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2,
FeatureVFP2,
FeatureHasSlowFPVMLx]>;
-// FIXME: A5 has currently the same Schedule model as A8
def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5,
FeatureHasRetAddrStack,
FeatureTrustZone,
@@ -603,8 +709,6 @@ def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7,
FeatureVMLxForwarding,
FeatureMP,
FeatureVFP4,
- FeatureHWDiv,
- FeatureHWDivARM,
FeatureVirtualization]>;
def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8,
@@ -630,19 +734,15 @@ def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9,
FeatureCheckVLDnAlign,
FeatureMP]>;
-// FIXME: A12 has currently the same Schedule model as A9
def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12,
FeatureHasRetAddrStack,
FeatureTrustZone,
FeatureVMLxForwarding,
FeatureVFP4,
- FeatureHWDiv,
- FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureVirtualization,
FeatureMP]>;
-// FIXME: A15 has currently the same Schedule model as A9.
def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
FeatureDontWidenVMOVS,
FeatureHasRetAddrStack,
@@ -651,26 +751,19 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
FeatureVFP4,
FeatureMP,
FeatureCheckVLDnAlign,
- FeatureHWDiv,
- FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureVirtualization]>;
-// FIXME: A17 has currently the same Schedule model as A9
def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17,
FeatureHasRetAddrStack,
FeatureTrustZone,
FeatureMP,
FeatureVMLxForwarding,
FeatureVFP4,
- FeatureHWDiv,
- FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureVirtualization]>;
-// FIXME: krait has currently the same Schedule model as A9
-// FIXME: krait has currently the same features as A9 plus VFP4 and hardware
-// division features.
+// FIXME: krait has currently the same features as A9 plus VFP4 and HWDiv
def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
FeatureHasRetAddrStack,
FeatureMuxedUnits,
@@ -679,7 +772,7 @@ def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
FeatureFP16,
FeatureAvoidPartialCPSR,
FeatureVFP4,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM]>;
def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
@@ -687,7 +780,7 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
FeatureNEONForFP,
FeatureVFP4,
FeatureMP,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureAvoidMOVsShOp,
@@ -700,12 +793,10 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
FeatureSlowVGETLNi32,
FeatureSlowVDUP32]>;
-// FIXME: R4 has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4,
FeatureHasRetAddrStack,
FeatureAvoidPartialCPSR]>;
-// FIXME: R4F has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4,
FeatureHasRetAddrStack,
FeatureSlowFPBrcc,
@@ -714,7 +805,6 @@ def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4,
FeatureD16,
FeatureAvoidPartialCPSR]>;
-// FIXME: R5 has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
FeatureHasRetAddrStack,
FeatureVFP3,
@@ -724,7 +814,6 @@ def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
FeatureHasSlowFPVMLx,
FeatureAvoidPartialCPSR]>;
-// FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5.
def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
FeatureHasRetAddrStack,
FeatureVFP3,
@@ -747,63 +836,80 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
FeatureHasSlowFPVMLx,
FeatureAvoidPartialCPSR]>;
-def : ProcNoItin<"cortex-m3", [ARMv7m, ProcM3]>;
-def : ProcNoItin<"sc300", [ARMv7m, ProcM3]>;
+def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m,
+ ProcM3,
+ FeatureHasNoBranchPredictor]>;
-def : ProcNoItin<"cortex-m4", [ARMv7em,
+def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m,
+ ProcM3,
+ FeatureHasNoBranchPredictor]>;
+
+def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em,
FeatureVFP4,
FeatureVFPOnlySP,
- FeatureD16]>;
+ FeatureD16,
+ FeatureHasNoBranchPredictor]>;
def : ProcNoItin<"cortex-m7", [ARMv7em,
FeatureFPARMv8,
FeatureD16]>;
+def : ProcNoItin<"cortex-m23", [ARMv8mBaseline,
+ FeatureNoMovt]>;
+
+def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline,
+ FeatureDSP,
+ FeatureFPARMv8,
+ FeatureD16,
+ FeatureVFPOnlySP,
+ FeatureHasNoBranchPredictor]>;
+
def : ProcNoItin<"cortex-a32", [ARMv8a,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC,
FeatureFPAO]>;
-def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57,
- FeatureHWDiv,
+def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC,
- FeatureFPAO]>;
+ FeatureFPAO,
+ FeatureAvoidPartialCPSR,
+ FeatureCheapPredicableCPSR]>;
def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
-// Cyclone is very similar to swift
def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureHasRetAddrStack,
FeatureNEONForFP,
FeatureVFP4,
FeatureMP,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureAvoidMOVsShOp,
@@ -812,19 +918,25 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureZCZeroing]>;
def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1,
- FeatureHWDiv,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def : ProcNoItin<"kryo", [ARMv8a, ProcKryo,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
@@ -837,7 +949,7 @@ def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52,
//===----------------------------------------------------------------------===//
include "ARMRegisterInfo.td"
-
+include "ARMRegisterBanks.td"
include "ARMCallingConv.td"
//===----------------------------------------------------------------------===//
@@ -845,7 +957,6 @@ include "ARMCallingConv.td"
//===----------------------------------------------------------------------===//
include "ARMInstrInfo.td"
-
def ARMInstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
@@ -866,7 +977,7 @@ def ARMAsmParserVariant : AsmParserVariant {
}
def ARM : Target {
- // Pull in Instruction Info:
+ // Pull in Instruction Info.
let InstructionSet = ARMInstrInfo;
let AssemblyWriters = [ARMAsmWriter];
let AssemblyParserVariants = [ARMAsmParserVariant];
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 95db35c..582153d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -23,6 +23,8 @@
#include "MCTargetDesc/ARMMCExpr.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
@@ -43,9 +45,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ARMBuildAttributes.h"
-#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
@@ -589,12 +589,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
ATS.finishAttributeSection();
}
-static bool isV8M(const ARMSubtarget *Subtarget) {
- // Note that v8M Baseline is a subset of v6T2!
- return (Subtarget->hasV8MBaselineOps() && !Subtarget->hasV6T2Ops()) ||
- Subtarget->hasV8MMainlineOps();
-}
-
//===----------------------------------------------------------------------===//
// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
// FIXME:
@@ -602,39 +596,6 @@ static bool isV8M(const ARMSubtarget *Subtarget) {
// to appear in the .ARM.attributes section in ELF.
// Instead of subclassing the MCELFStreamer, we do the work here.
-static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
- const ARMSubtarget *Subtarget) {
- if (CPU == "xscale")
- return ARMBuildAttrs::v5TEJ;
-
- if (Subtarget->hasV8Ops()) {
- if (Subtarget->isRClass())
- return ARMBuildAttrs::v8_R;
- return ARMBuildAttrs::v8_A;
- } else if (Subtarget->hasV8MMainlineOps())
- return ARMBuildAttrs::v8_M_Main;
- else if (Subtarget->hasV7Ops()) {
- if (Subtarget->isMClass() && Subtarget->hasDSP())
- return ARMBuildAttrs::v7E_M;
- return ARMBuildAttrs::v7;
- } else if (Subtarget->hasV6T2Ops())
- return ARMBuildAttrs::v6T2;
- else if (Subtarget->hasV8MBaselineOps())
- return ARMBuildAttrs::v8_M_Base;
- else if (Subtarget->hasV6MOps())
- return ARMBuildAttrs::v6S_M;
- else if (Subtarget->hasV6Ops())
- return ARMBuildAttrs::v6;
- else if (Subtarget->hasV5TEOps())
- return ARMBuildAttrs::v5TE;
- else if (Subtarget->hasV5TOps())
- return ARMBuildAttrs::v5T;
- else if (Subtarget->hasV4TOps())
- return ARMBuildAttrs::v4T;
- else
- return ARMBuildAttrs::v4;
-}
-
// Returns true if all functions have the same function attribute value.
// It also returns true when the module has no functions.
static bool checkFunctionsAttributeConsistency(const Module &M, StringRef Attr,
@@ -671,89 +632,8 @@ void ARMAsmPrinter::emitAttributes() {
static_cast<const ARMBaseTargetMachine &>(TM);
const ARMSubtarget STI(TT, CPU, ArchFS, ATM, ATM.isLittleEndian());
- const std::string &CPUString = STI.getCPUString();
-
- if (!StringRef(CPUString).startswith("generic")) {
- // FIXME: remove krait check when GNU tools support krait cpu
- if (STI.isKrait()) {
- ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
- // We consider krait as a "cortex-a9" + hwdiv CPU
- // Enable hwdiv through ".arch_extension idiv"
- if (STI.hasDivide() || STI.hasDivideInARMMode())
- ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM);
- } else
- ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
- }
-
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(CPUString, &STI));
-
- // Tag_CPU_arch_profile must have the default value of 0 when "Architecture
- // profile is not applicable (e.g. pre v7, or cross-profile code)".
- if (STI.hasV7Ops() || isV8M(&STI)) {
- if (STI.isAClass()) {
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::ApplicationProfile);
- } else if (STI.isRClass()) {
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::RealTimeProfile);
- } else if (STI.isMClass()) {
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::MicroControllerProfile);
- }
- }
-
- ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use,
- STI.hasARMOps() ? ARMBuildAttrs::Allowed
- : ARMBuildAttrs::Not_Allowed);
- if (isV8M(&STI)) {
- ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::AllowThumbDerived);
- } else if (STI.isThumb1Only()) {
- ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed);
- } else if (STI.hasThumb2()) {
- ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::AllowThumb32);
- }
-
- if (STI.hasNEON()) {
- /* NEON is not exactly a VFP architecture, but GAS emit one of
- * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
- if (STI.hasFPARMv8()) {
- if (STI.hasCrypto())
- ATS.emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8);
- else
- ATS.emitFPU(ARM::FK_NEON_FP_ARMV8);
- } else if (STI.hasVFP4())
- ATS.emitFPU(ARM::FK_NEON_VFPV4);
- else
- ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON);
- // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
- if (STI.hasV8Ops())
- ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- STI.hasV8_1aOps() ? ARMBuildAttrs::AllowNeonARMv8_1a:
- ARMBuildAttrs::AllowNeonARMv8);
- } else {
- if (STI.hasFPARMv8())
- // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
- // FPU, but there are two different names for it depending on the CPU.
- ATS.emitFPU(STI.hasD16()
- ? (STI.isFPOnlySP() ? ARM::FK_FPV5_SP_D16 : ARM::FK_FPV5_D16)
- : ARM::FK_FP_ARMV8);
- else if (STI.hasVFP4())
- ATS.emitFPU(STI.hasD16()
- ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16)
- : ARM::FK_VFPV4);
- else if (STI.hasVFP3())
- ATS.emitFPU(STI.hasD16()
- // +d16
- ? (STI.isFPOnlySP()
- ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD)
- : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16))
- // -d16
- : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3));
- else if (STI.hasVFP2())
- ATS.emitFPU(ARM::FK_VFPV2);
- }
+ // Emit build attributes for the available hardware.
+ ATS.emitTargetAttributes(STI);
// RW data addressing.
if (isPositionIndependent()) {
@@ -844,34 +724,17 @@ void ARMAsmPrinter::emitAttributes() {
ARMBuildAttrs::Allowed);
else
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
- ARMBuildAttrs::AllowIEE754);
-
- if (STI.allowsUnalignedMem())
- ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
- ARMBuildAttrs::Allowed);
- else
- ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
- ARMBuildAttrs::Not_Allowed);
+ ARMBuildAttrs::AllowIEEE754);
// FIXME: add more flags to ARMBuildAttributes.h
// 8-bytes alignment stuff.
ATS.emitAttribute(ARMBuildAttrs::ABI_align_needed, 1);
ATS.emitAttribute(ARMBuildAttrs::ABI_align_preserved, 1);
- // ABI_HardFP_use attribute to indicate single precision FP.
- if (STI.isFPOnlySP())
- ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
- ARMBuildAttrs::HardFPSinglePrecision);
-
// Hard float. Use both S and D registers and conform to AAPCS-VFP.
if (STI.isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard)
ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS);
- // FIXME: Should we signal R9 usage?
-
- if (STI.hasFP16())
- ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
-
// FIXME: To support emitting this build attribute as GCC does, the
// -mfp16-format option and associated plumbing must be
// supported. For now the __fp16 type is exposed by default, so this
@@ -879,21 +742,6 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format,
ARMBuildAttrs::FP16FormatIEEE);
- if (STI.hasMPExtension())
- ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
-
- // Hardware divide in ARM mode is part of base arch, starting from ARMv8.
- // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M).
- // It is not possible to produce DisallowDIV: if hwdiv is present in the base
- // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits.
- // AllowDIVExt is only emitted if hwdiv isn't available in the base arch;
- // otherwise, the default value (AllowDIVIfExists) applies.
- if (STI.hasDivideInARMMode() && !STI.hasV8Ops())
- ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
-
- if (STI.hasDSP() && isV8M(&STI))
- ATS.emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed);
-
if (MMI) {
if (const Module *SourceModule = MMI->getModule()) {
// ABI_PCS_wchar_t to indicate wchar_t width
@@ -930,16 +778,6 @@ void ARMAsmPrinter::emitAttributes() {
else
ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
ARMBuildAttrs::R9IsGPR);
-
- if (STI.hasTrustZone() && STI.hasVirtualization())
- ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
- ARMBuildAttrs::AllowTZVirtualization);
- else if (STI.hasTrustZone())
- ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
- ARMBuildAttrs::AllowTZ);
- else if (STI.hasVirtualization())
- ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
- ARMBuildAttrs::AllowVirtualization);
}
//===----------------------------------------------------------------------===//
@@ -1142,6 +980,11 @@ void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) {
const MachineOperand &MO1 = MI->getOperand(1);
unsigned JTI = MO1.getIndex();
+ // Make sure the Thumb jump table is 4-byte aligned. This will be a nop for
+ // ARM mode tables.
+ EmitAlignment(2);
+
+ // Emit a label for the jump table.
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
OutStreamer->EmitLabel(JTISymbol);
@@ -1255,11 +1098,12 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
switch (Opc) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
case ARM::tPUSH:
// Special case here: no src & dst reg, but two extra imp ops.
StartOp = 2; NumOffset = 2;
+ LLVM_FALLTHROUGH;
case ARM::STMDB_UPD:
case ARM::t2STMDB_UPD:
case ARM::VSTMDDB_UPD:
@@ -1291,7 +1135,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
int64_t Offset = 0;
switch (Opc) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
case ARM::MOVr:
case ARM::tMOVr:
@@ -1346,11 +1190,11 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
}
}
} else if (DstReg == ARM::SP) {
- MI->dump();
+ MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
}
else {
- MI->dump();
+ MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
}
}
@@ -1661,6 +1505,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
case ARM::CONSTPOOL_ENTRY: {
+ if (Subtarget->genExecuteOnly())
+ llvm_unreachable("execute-only should not generate constant pools");
+
/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool
/// in the function. The first operand is the ID# for this instruction, the
/// second is the index into the MachineConstantPool that this is, the third
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 70a3246..3cf5950 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -11,34 +11,58 @@
//
//===----------------------------------------------------------------------===//
-#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMFeatures.h"
#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <new>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -168,9 +192,8 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
.addReg(BaseReg)
.addImm(Amt)
- .addImm(Pred)
- .addReg(0)
- .addReg(0);
+ .add(predOps(Pred))
+ .add(condCodeOp());
} else if (Amt != 0) {
ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
@@ -180,17 +203,15 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
.addReg(OffReg)
.addReg(0)
.addImm(SOOpc)
- .addImm(Pred)
- .addReg(0)
- .addReg(0);
+ .add(predOps(Pred))
+ .add(condCodeOp());
} else
UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
.addReg(BaseReg)
.addReg(OffReg)
- .addImm(Pred)
- .addReg(0)
- .addReg(0);
+ .add(predOps(Pred))
+ .add(condCodeOp());
break;
}
case ARMII::AddrMode3 : {
@@ -202,17 +223,15 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
.addReg(BaseReg)
.addImm(Amt)
- .addImm(Pred)
- .addReg(0)
- .addReg(0);
+ .add(predOps(Pred))
+ .add(condCodeOp());
else
UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
.addReg(BaseReg)
.addReg(OffReg)
- .addImm(Pred)
- .addReg(0)
- .addReg(0);
+ .add(predOps(Pred))
+ .add(condCodeOp());
break;
}
}
@@ -306,7 +325,6 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// Walk backwards from the end of the basic block until the branch is
// analyzed or we give up.
while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
-
// Flag to be raised on unanalyzeable instructions. This is useful in cases
// where we want to clean up on the end of the basic block before we bail
// out.
@@ -381,7 +399,6 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
return false;
}
-
unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
assert(!BytesRemoved && "code size not handled");
@@ -433,20 +450,24 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
if (!FBB) {
if (Cond.empty()) { // Unconditional branch?
if (isThumb)
- BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
else
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
} else
- BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
- .addImm(Cond[0].getImm()).addOperand(Cond[1]);
+ BuildMI(&MBB, DL, get(BccOpc))
+ .addMBB(TBB)
+ .addImm(Cond[0].getImm())
+ .add(Cond[1]);
return 1;
}
// Two-way conditional branch.
- BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
- .addImm(Cond[0].getImm()).addOperand(Cond[1]);
+ BuildMI(&MBB, DL, get(BccOpc))
+ .addMBB(TBB)
+ .addImm(Cond[0].getImm())
+ .add(Cond[1]);
if (isThumb)
- BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
else
BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
return 2;
@@ -537,13 +558,68 @@ bool ARMBaseInstrInfo::DefinesPredicate(
return Found;
}
-static bool isCPSRDefined(const MachineInstr *MI) {
- for (const auto &MO : MI->operands())
+bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) {
+ for (const auto &MO : MI.operands())
if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
return true;
return false;
}
+bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI,
+ unsigned Op) const {
+ const MachineOperand &Offset = MI.getOperand(Op + 1);
+ return Offset.getReg() != 0;
+}
+
+// Load with negative register offset requires additional 1cyc and +I unit
+// for Cortex A57
+bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI,
+ unsigned Op) const {
+ const MachineOperand &Offset = MI.getOperand(Op + 1);
+ const MachineOperand &Opc = MI.getOperand(Op + 2);
+ assert(Opc.isImm());
+ assert(Offset.isReg());
+ int64_t OpcImm = Opc.getImm();
+
+ bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
+ return (isSub && Offset.getReg() != 0);
+}
+
+bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI,
+ unsigned Op) const {
+ const MachineOperand &Opc = MI.getOperand(Op + 2);
+ unsigned OffImm = Opc.getImm();
+ return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
+}
+
+// Load, scaled register offset, not plus LSL2
+bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI,
+ unsigned Op) const {
+ const MachineOperand &Opc = MI.getOperand(Op + 2);
+ unsigned OffImm = Opc.getImm();
+
+ bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
+ unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+ ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+ if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled
+ bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2);
+ return !SimpleScaled;
+}
+
+// Minus reg for ldstso addr mode
+bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI,
+ unsigned Op) const {
+ unsigned OffImm = MI.getOperand(Op + 2).getImm();
+ return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+}
+
+// Load, scaled register offset
+bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI,
+ unsigned Op) const {
+ unsigned OffImm = MI.getOperand(Op + 2).getImm();
+ return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
+}
+
static bool isEligibleForITBlock(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default: return true;
@@ -569,14 +645,14 @@ static bool isEligibleForITBlock(const MachineInstr *MI) {
case ARM::tSUBi3: // SUB (immediate) T1
case ARM::tSUBi8: // SUB (immediate) T2
case ARM::tSUBrr: // SUB (register) T1
- return !isCPSRDefined(MI);
+ return !ARMBaseInstrInfo::isCPSRDefined(*MI);
}
}
/// isPredicable - Return true if the specified instruction can be predicated.
/// By default, this returns true for every instruction with a
/// PredicateOperand.
-bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const {
+bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
if (!MI.isPredicable())
return false;
@@ -586,22 +662,25 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const {
if (!isEligibleForITBlock(&MI))
return false;
- ARMFunctionInfo *AFI =
+ const ARMFunctionInfo *AFI =
MI.getParent()->getParent()->getInfo<ARMFunctionInfo>();
+ // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
+ // In their ARM encoding, they can't be encoded in a conditional form.
+ if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
+ return false;
+
if (AFI->isThumb2Function()) {
if (getSubtarget().restrictIT())
return isV8EligibleForIT(&MI);
- } else { // non-Thumb
- if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
- return false;
}
return true;
}
namespace llvm {
-template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
+
+template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || MO.isUndef() || MO.isUse())
@@ -614,7 +693,8 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
// all definitions of CPSR are dead
return true;
}
-}
+
+} // end namespace llvm
/// GetInstSize - Return the size of the specified MachineInstr.
///
@@ -698,9 +778,8 @@ void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,
if (Subtarget.isMClass())
MIB.addImm(0x800);
- AddDefaultPred(MIB);
-
- MIB.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
+ MIB.add(predOps(ARMCC::AL))
+ .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
}
void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
@@ -718,11 +797,9 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
else
MIB.addImm(8);
- MIB.addReg(SrcReg, getKillRegState(KillSrc));
-
- AddDefaultPred(MIB);
-
- MIB.addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
+ MIB.addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL))
+ .addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
}
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
@@ -733,8 +810,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
if (GPRDest && GPRSrc) {
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc))));
+ BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
return;
}
@@ -758,7 +837,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MIB.addReg(SrcReg, getKillRegState(KillSrc));
if (Opc == ARM::VORRq)
MIB.addReg(SrcReg, getKillRegState(KillSrc));
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
return;
}
@@ -845,10 +924,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// VORR takes two source operands.
if (Opc == ARM::VORRq)
Mov.addReg(Src);
- Mov = AddDefaultPred(Mov);
+ Mov = Mov.add(predOps(ARMCC::AL));
// MOVr can set CC.
if (Opc == ARM::MOVr)
- Mov = AddDefaultCC(Mov);
+ Mov = Mov.add(condCodeOp());
}
// Add implicit super-register defs and kills to the last instruction.
Mov->addRegisterDefined(DestReg, TRI);
@@ -883,38 +962,47 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), Align);
- switch (RC->getSize()) {
+ switch (TRI->getSpillSize(*RC)) {
case 4:
if (ARM::GPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VSTRS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else
llvm_unreachable("Unknown reg class!");
break;
case 8:
if (ARM::DPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VSTRD))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
if (Subtarget.hasV5TEOps()) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
- MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
-
- AddDefaultPred(MIB);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
// Fallback to STM instruction, which has existed since the dawn of
// time.
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
- .addFrameIndex(FI).addMemOperand(MMO));
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
}
@@ -925,15 +1013,18 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (ARM::DPairRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
- .addFrameIndex(FI).addImm(16)
- .addReg(SrcReg, getKillRegState(isKill))
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VST1q64))
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI)
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
}
} else
llvm_unreachable("Unknown reg class!");
@@ -942,15 +1033,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
- .addFrameIndex(FI).addImm(16)
- .addReg(SrcReg, getKillRegState(isKill))
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
@@ -963,15 +1056,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
- .addFrameIndex(FI).addImm(16)
- .addReg(SrcReg, getKillRegState(isKill))
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
@@ -982,10 +1077,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 64:
if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
@@ -1065,22 +1160,31 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), Align);
- switch (RC->getSize()) {
+ switch (TRI->getSpillSize(*RC)) {
case 4:
if (ARM::GPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else
llvm_unreachable("Unknown reg class!");
break;
case 8:
if (ARM::DPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
MachineInstrBuilder MIB;
@@ -1088,14 +1192,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
- MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
-
- AddDefaultPred(MIB);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
// Fallback to LDM instruction, which has existed since the dawn of
// time.
- MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
- .addFrameIndex(FI).addMemOperand(MMO));
+ MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
}
@@ -1108,13 +1213,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case 16:
if (ARM::DPairRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
- .addFrameIndex(FI).addImm(16)
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
- .addFrameIndex(FI)
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
+ .addFrameIndex(FI)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
}
} else
llvm_unreachable("Unknown reg class!");
@@ -1122,14 +1230,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case 24:
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
- .addFrameIndex(FI).addImm(16)
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
- .addFrameIndex(FI)
- .addMemOperand(MMO));
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
@@ -1142,14 +1252,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case 32:
if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
- .addFrameIndex(FI).addImm(16)
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
@@ -1162,10 +1274,10 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 64:
if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
@@ -1248,7 +1360,7 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
: isThumb1 ? ARM::tLDMIA_UPD
: ARM::LDMIA_UPD))
- .addOperand(MI->getOperand(1));
+ .add(MI->getOperand(1));
} else {
LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
}
@@ -1257,17 +1369,17 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
: isThumb1 ? ARM::tSTMIA_UPD
: ARM::STMIA_UPD))
- .addOperand(MI->getOperand(0));
+ .add(MI->getOperand(0));
} else {
STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
}
- AddDefaultPred(LDM.addOperand(MI->getOperand(3)));
- AddDefaultPred(STM.addOperand(MI->getOperand(2)));
+ LDM.add(MI->getOperand(3)).add(predOps(ARMCC::AL));
+ STM.add(MI->getOperand(2)).add(predOps(ARMCC::AL));
// Sort the scratch registers into ascending order.
const TargetRegisterInfo &TRI = getRegisterInfo();
- llvm::SmallVector<unsigned, 6> ScratchRegs;
+ SmallVector<unsigned, 6> ScratchRegs;
for(unsigned I = 5; I < MI->getNumOperands(); ++I)
ScratchRegs.push_back(MI->getOperand(I).getReg());
std::sort(ScratchRegs.begin(), ScratchRegs.end(),
@@ -1285,7 +1397,6 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
BB->erase(MI);
}
-
bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
@@ -1346,7 +1457,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(ARM::VMOVD));
MI.getOperand(0).setReg(DstRegD);
MI.getOperand(1).setReg(SrcRegD);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
// We are now reading SrcRegD instead of SrcRegS. This may upset the
// register scavenger and machine verifier, so we need to indicate that we
@@ -1735,39 +1846,63 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
}
}
}
-
- // Attempt to estimate the relative costs of predication versus branching.
- // Here we scale up each component of UnpredCost to avoid precision issue when
- // scaling NumCycles by Probability.
- const unsigned ScalingUpFactor = 1024;
- unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor);
- UnpredCost += ScalingUpFactor; // The branch itself
- UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
-
- return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost;
+ return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
+ MBB, 0, 0, Probability);
}
bool ARMBaseInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &TMBB,
+isProfitableToIfCvt(MachineBasicBlock &TBB,
unsigned TCycles, unsigned TExtra,
- MachineBasicBlock &FMBB,
+ MachineBasicBlock &FBB,
unsigned FCycles, unsigned FExtra,
BranchProbability Probability) const {
- if (!TCycles || !FCycles)
+ if (!TCycles)
return false;
// Attempt to estimate the relative costs of predication versus branching.
// Here we scale up each component of UnpredCost to avoid precision issue when
// scaling TCycles/FCycles by Probability.
const unsigned ScalingUpFactor = 1024;
- unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
- unsigned FUnpredCost =
+
+ unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
+ unsigned UnpredCost;
+ if (!Subtarget.hasBranchPredictor()) {
+ // When we don't have a branch predictor it's always cheaper to not take a
+ // branch than take it, so we have to take that into account.
+ unsigned NotTakenBranchCost = 1;
+ unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
+ unsigned TUnpredCycles, FUnpredCycles;
+ if (!FCycles) {
+ // Triangle: TBB is the fallthrough
+ TUnpredCycles = TCycles + NotTakenBranchCost;
+ FUnpredCycles = TakenBranchCost;
+ } else {
+ // Diamond: TBB is the block that is branched to, FBB is the fallthrough
+ TUnpredCycles = TCycles + TakenBranchCost;
+ FUnpredCycles = FCycles + NotTakenBranchCost;
+ // The branch at the end of FBB will disappear when it's predicated, so
+ // discount it from PredCost.
+ PredCost -= 1 * ScalingUpFactor;
+ }
+ // The total cost is the cost of each path scaled by their probabilites
+ unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
+ unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
+ UnpredCost = TUnpredCost + FUnpredCost;
+ // When predicating assume that the first IT can be folded away but later
+ // ones cost one cycle each
+ if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
+ PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
+ }
+ } else {
+ unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
+ unsigned FUnpredCost =
Probability.getCompl().scale(FCycles * ScalingUpFactor);
- unsigned UnpredCost = TUnpredCost + FUnpredCost;
- UnpredCost += 1 * ScalingUpFactor; // The branch itself
- UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
+ UnpredCost = TUnpredCost + FUnpredCost;
+ UnpredCost += 1 * ScalingUpFactor; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
+ }
- return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost;
+ return PredCost <= UnpredCost;
}
bool
@@ -1793,7 +1928,6 @@ ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI,
return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
}
-
unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
if (Opc == ARM::B)
return ARM::Bcc;
@@ -1920,25 +2054,25 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI,
const MCInstrDesc &DefDesc = DefMI->getDesc();
for (unsigned i = 1, e = DefDesc.getNumOperands();
i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
- NewMI.addOperand(DefMI->getOperand(i));
+ NewMI.add(DefMI->getOperand(i));
unsigned CondCode = MI.getOperand(3).getImm();
if (Invert)
NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
else
NewMI.addImm(CondCode);
- NewMI.addOperand(MI.getOperand(4));
+ NewMI.add(MI.getOperand(4));
// DefMI is not the -S version that sets CPSR, so add an optional %noreg.
if (NewMI->hasOptionalDef())
- AddDefaultCC(NewMI);
+ NewMI.add(condCodeOp());
// The output register value when the predicate is false is an implicit
// register operand tied to the first def.
// The tie makes the register allocator ensure the FalseReg is allocated the
// same register as operand 0.
FalseReg.setImplicit();
- NewMI.addOperand(FalseReg);
+ NewMI.add(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
// Update SeenMIs set: register newly created MI and erase removed DefMI.
@@ -1983,6 +2117,16 @@ static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::RSBSrsi, ARM::RSBrsi},
{ARM::RSBSrsr, ARM::RSBrsr},
+ {ARM::tADDSi3, ARM::tADDi3},
+ {ARM::tADDSi8, ARM::tADDi8},
+ {ARM::tADDSrr, ARM::tADDrr},
+ {ARM::tADCS, ARM::tADC},
+
+ {ARM::tSUBSi3, ARM::tSUBi3},
+ {ARM::tSUBSi8, ARM::tSUBi8},
+ {ARM::tSUBSrr, ARM::tSUBrr},
+ {ARM::tSBCS, ARM::tSBC},
+
{ARM::t2ADDSri, ARM::t2ADDri},
{ARM::t2ADDSrr, ARM::t2ADDrr},
{ARM::t2ADDSrs, ARM::t2ADDrs},
@@ -2011,9 +2155,10 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
unsigned MIFlags) {
if (NumBytes == 0 && DestReg != BaseReg) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
- .addReg(BaseReg, RegState::Kill)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
- .setMIFlags(MIFlags);
+ .addReg(BaseReg, RegState::Kill)
+ .add(predOps(Pred, PredReg))
+ .add(condCodeOp())
+ .setMIFlags(MIFlags);
return;
}
@@ -2033,9 +2178,11 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
// Build the new ADD / SUB.
unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
- .setMIFlags(MIFlags);
+ .addReg(BaseReg, RegState::Kill)
+ .addImm(ThisVal)
+ .add(predOps(Pred, PredReg))
+ .add(condCodeOp())
+ .setMIFlags(MIFlags);
BaseReg = DestReg;
}
}
@@ -2154,7 +2301,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
// Add the complete list back in.
MachineInstrBuilder MIB(MF, &*MI);
for (int i = RegList.size() - 1; i >= 0; --i)
- MIB.addOperand(RegList[i]);
+ MIB.add(RegList[i]);
return true;
}
@@ -2213,33 +2360,30 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned NumBits = 0;
unsigned Scale = 1;
switch (AddrMode) {
- case ARMII::AddrMode_i12: {
+ case ARMII::AddrMode_i12:
ImmIdx = FrameRegIdx + 1;
InstrOffs = MI.getOperand(ImmIdx).getImm();
NumBits = 12;
break;
- }
- case ARMII::AddrMode2: {
+ case ARMII::AddrMode2:
ImmIdx = FrameRegIdx+2;
InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
InstrOffs *= -1;
NumBits = 12;
break;
- }
- case ARMII::AddrMode3: {
+ case ARMII::AddrMode3:
ImmIdx = FrameRegIdx+2;
InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
InstrOffs *= -1;
NumBits = 8;
break;
- }
case ARMII::AddrMode4:
case ARMII::AddrMode6:
// Can't fold any offset even if it's zero.
return false;
- case ARMII::AddrMode5: {
+ case ARMII::AddrMode5:
ImmIdx = FrameRegIdx+1;
InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
@@ -2247,7 +2391,6 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
NumBits = 8;
Scale = 4;
break;
- }
default:
llvm_unreachable("Unsupported addressing mode!");
}
@@ -2401,6 +2544,63 @@ inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
return false;
}
+static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case ARM::tLSLri:
+ case ARM::tLSRri:
+ case ARM::tLSLrr:
+ case ARM::tLSRrr:
+ case ARM::tSUBrr:
+ case ARM::tADDrr:
+ case ARM::tADDi3:
+ case ARM::tADDi8:
+ case ARM::tSUBi3:
+ case ARM::tSUBi8:
+ case ARM::tMUL:
+ IsThumb1 = true;
+ LLVM_FALLTHROUGH;
+ case ARM::RSBrr:
+ case ARM::RSBri:
+ case ARM::RSCrr:
+ case ARM::RSCri:
+ case ARM::ADDrr:
+ case ARM::ADDri:
+ case ARM::ADCrr:
+ case ARM::ADCri:
+ case ARM::SUBrr:
+ case ARM::SUBri:
+ case ARM::SBCrr:
+ case ARM::SBCri:
+ case ARM::t2RSBri:
+ case ARM::t2ADDrr:
+ case ARM::t2ADDri:
+ case ARM::t2ADCrr:
+ case ARM::t2ADCri:
+ case ARM::t2SUBrr:
+ case ARM::t2SUBri:
+ case ARM::t2SBCrr:
+ case ARM::t2SBCri:
+ case ARM::ANDrr:
+ case ARM::ANDri:
+ case ARM::t2ANDrr:
+ case ARM::t2ANDri:
+ case ARM::ORRrr:
+ case ARM::ORRri:
+ case ARM::t2ORRrr:
+ case ARM::t2ORRri:
+ case ARM::EORrr:
+ case ARM::EORri:
+ case ARM::t2EORrr:
+ case ARM::t2EORri:
+ case ARM::t2LSRri:
+ case ARM::t2LSRrr:
+ case ARM::t2LSLri:
+ case ARM::t2LSLrr:
+ return true;
+ }
+}
+
/// optimizeCompareInstr - Convert the instruction supplying the argument to the
/// comparison into one that sets the zero bit in the flags register;
/// Remove a redundant Compare instruction if an earlier instruction can set the
@@ -2462,6 +2662,41 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
return false;
}
+ bool IsThumb1 = false;
+ if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
+ return false;
+
+ // We also want to do this peephole for cases like this: if (a*b == 0),
+ // and optimise away the CMP instruction from the generated code sequence:
+ // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
+ // resulting from the select instruction, but these MOVS instructions for
+ // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
+ // However, if we only have MOVS instructions in between the CMP and the
+ // other instruction (the MULS in this example), then the CPSR is dead so we
+ // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
+ // reordering and then continue the analysis hoping we can eliminate the
+ // CMP. This peephole works on the vregs, so is still in SSA form. As a
+ // consequence, the movs won't redefine/kill the MUL operands which would
+ // make this reordering illegal.
+ if (MI && IsThumb1) {
+ --I;
+ bool CanReorder = true;
+ const bool HasStmts = I != E;
+ for (; I != E; --I) {
+ if (I->getOpcode() != ARM::tMOVi8) {
+ CanReorder = false;
+ break;
+ }
+ }
+ if (HasStmts && CanReorder) {
+ MI = MI->removeFromParent();
+ E = CmpInstr;
+ CmpInstr.getParent()->insert(E, MI);
+ }
+ I = CmpInstr;
+ E = MI;
+ }
+
// Check that CPSR isn't set between the comparison instruction and the one we
// want to change. At the same time, search for Sub.
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -2497,183 +2732,128 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
if (isPredicated(*MI))
return false;
- bool IsThumb1 = false;
- switch (MI->getOpcode()) {
- default: break;
- case ARM::tLSLri:
- case ARM::tLSRri:
- case ARM::tLSLrr:
- case ARM::tLSRrr:
- case ARM::tSUBrr:
- case ARM::tADDrr:
- case ARM::tADDi3:
- case ARM::tADDi8:
- case ARM::tSUBi3:
- case ARM::tSUBi8:
- IsThumb1 = true;
- LLVM_FALLTHROUGH;
- case ARM::RSBrr:
- case ARM::RSBri:
- case ARM::RSCrr:
- case ARM::RSCri:
- case ARM::ADDrr:
- case ARM::ADDri:
- case ARM::ADCrr:
- case ARM::ADCri:
- case ARM::SUBrr:
- case ARM::SUBri:
- case ARM::SBCrr:
- case ARM::SBCri:
- case ARM::t2RSBri:
- case ARM::t2ADDrr:
- case ARM::t2ADDri:
- case ARM::t2ADCrr:
- case ARM::t2ADCri:
- case ARM::t2SUBrr:
- case ARM::t2SUBri:
- case ARM::t2SBCrr:
- case ARM::t2SBCri:
- case ARM::ANDrr:
- case ARM::ANDri:
- case ARM::t2ANDrr:
- case ARM::t2ANDri:
- case ARM::ORRrr:
- case ARM::ORRri:
- case ARM::t2ORRrr:
- case ARM::t2ORRri:
- case ARM::EORrr:
- case ARM::EORri:
- case ARM::t2EORrr:
- case ARM::t2EORri:
- case ARM::t2LSRri:
- case ARM::t2LSRrr:
- case ARM::t2LSLri:
- case ARM::t2LSLrr: {
- // Scan forward for the use of CPSR
- // When checking against MI: if it's a conditional code that requires
- // checking of the V bit or C bit, then this is not safe to do.
- // It is safe to remove CmpInstr if CPSR is redefined or killed.
- // If we are done with the basic block, we need to check whether CPSR is
- // live-out.
- SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
- OperandsToUpdate;
- bool isSafe = false;
- I = CmpInstr;
- E = CmpInstr.getParent()->end();
- while (!isSafe && ++I != E) {
- const MachineInstr &Instr = *I;
- for (unsigned IO = 0, EO = Instr.getNumOperands();
- !isSafe && IO != EO; ++IO) {
- const MachineOperand &MO = Instr.getOperand(IO);
- if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
- isSafe = true;
- break;
- }
- if (!MO.isReg() || MO.getReg() != ARM::CPSR)
- continue;
- if (MO.isDef()) {
- isSafe = true;
- break;
- }
- // Condition code is after the operand before CPSR except for VSELs.
- ARMCC::CondCodes CC;
- bool IsInstrVSel = true;
- switch (Instr.getOpcode()) {
- default:
- IsInstrVSel = false;
- CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
- break;
- case ARM::VSELEQD:
- case ARM::VSELEQS:
- CC = ARMCC::EQ;
- break;
- case ARM::VSELGTD:
- case ARM::VSELGTS:
- CC = ARMCC::GT;
- break;
- case ARM::VSELGED:
- case ARM::VSELGES:
- CC = ARMCC::GE;
- break;
- case ARM::VSELVSS:
- case ARM::VSELVSD:
- CC = ARMCC::VS;
- break;
- }
+ // Scan forward for the use of CPSR
+ // When checking against MI: if it's a conditional code that requires
+ // checking of the V bit or C bit, then this is not safe to do.
+ // It is safe to remove CmpInstr if CPSR is redefined or killed.
+ // If we are done with the basic block, we need to check whether CPSR is
+ // live-out.
+ SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
+ OperandsToUpdate;
+ bool isSafe = false;
+ I = CmpInstr;
+ E = CmpInstr.getParent()->end();
+ while (!isSafe && ++I != E) {
+ const MachineInstr &Instr = *I;
+ for (unsigned IO = 0, EO = Instr.getNumOperands();
+ !isSafe && IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
+ isSafe = true;
+ break;
+ }
+ if (!MO.isReg() || MO.getReg() != ARM::CPSR)
+ continue;
+ if (MO.isDef()) {
+ isSafe = true;
+ break;
+ }
+ // Condition code is after the operand before CPSR except for VSELs.
+ ARMCC::CondCodes CC;
+ bool IsInstrVSel = true;
+ switch (Instr.getOpcode()) {
+ default:
+ IsInstrVSel = false;
+ CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
+ break;
+ case ARM::VSELEQD:
+ case ARM::VSELEQS:
+ CC = ARMCC::EQ;
+ break;
+ case ARM::VSELGTD:
+ case ARM::VSELGTS:
+ CC = ARMCC::GT;
+ break;
+ case ARM::VSELGED:
+ case ARM::VSELGES:
+ CC = ARMCC::GE;
+ break;
+ case ARM::VSELVSS:
+ case ARM::VSELVSD:
+ CC = ARMCC::VS;
+ break;
+ }
- if (Sub) {
- ARMCC::CondCodes NewCC = getSwappedCondition(CC);
- if (NewCC == ARMCC::AL)
- return false;
- // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
- // on CMP needs to be updated to be based on SUB.
- // Push the condition code operands to OperandsToUpdate.
- // If it is safe to remove CmpInstr, the condition code of these
- // operands will be modified.
- if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
- Sub->getOperand(2).getReg() == SrcReg) {
- // VSel doesn't support condition code update.
- if (IsInstrVSel)
- return false;
- OperandsToUpdate.push_back(
- std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
- }
- } else {
- // No Sub, so this is x = <op> y, z; cmp x, 0.
- switch (CC) {
- case ARMCC::EQ: // Z
- case ARMCC::NE: // Z
- case ARMCC::MI: // N
- case ARMCC::PL: // N
- case ARMCC::AL: // none
- // CPSR can be used multiple times, we should continue.
- break;
- case ARMCC::HS: // C
- case ARMCC::LO: // C
- case ARMCC::VS: // V
- case ARMCC::VC: // V
- case ARMCC::HI: // C Z
- case ARMCC::LS: // C Z
- case ARMCC::GE: // N V
- case ARMCC::LT: // N V
- case ARMCC::GT: // Z N V
- case ARMCC::LE: // Z N V
- // The instruction uses the V bit or C bit which is not safe.
+ if (Sub) {
+ ARMCC::CondCodes NewCC = getSwappedCondition(CC);
+ if (NewCC == ARMCC::AL)
+ return false;
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
+ // on CMP needs to be updated to be based on SUB.
+ // Push the condition code operands to OperandsToUpdate.
+ // If it is safe to remove CmpInstr, the condition code of these
+ // operands will be modified.
+ if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg) {
+ // VSel doesn't support condition code update.
+ if (IsInstrVSel)
return false;
- }
+ OperandsToUpdate.push_back(
+ std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
}
- }
- }
-
- // If CPSR is not killed nor re-defined, we should check whether it is
- // live-out. If it is live-out, do not optimize.
- if (!isSafe) {
- MachineBasicBlock *MBB = CmpInstr.getParent();
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI)
- if ((*SI)->isLiveIn(ARM::CPSR))
+ } else {
+ // No Sub, so this is x = <op> y, z; cmp x, 0.
+ switch (CC) {
+ case ARMCC::EQ: // Z
+ case ARMCC::NE: // Z
+ case ARMCC::MI: // N
+ case ARMCC::PL: // N
+ case ARMCC::AL: // none
+ // CPSR can be used multiple times, we should continue.
+ break;
+ case ARMCC::HS: // C
+ case ARMCC::LO: // C
+ case ARMCC::VS: // V
+ case ARMCC::VC: // V
+ case ARMCC::HI: // C Z
+ case ARMCC::LS: // C Z
+ case ARMCC::GE: // N V
+ case ARMCC::LT: // N V
+ case ARMCC::GT: // Z N V
+ case ARMCC::LE: // Z N V
+ // The instruction uses the V bit or C bit which is not safe.
return false;
+ }
+ }
}
+ }
- // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
- // set CPSR so this is represented as an explicit output)
- if (!IsThumb1) {
- MI->getOperand(5).setReg(ARM::CPSR);
- MI->getOperand(5).setIsDef(true);
- }
- assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
- CmpInstr.eraseFromParent();
-
- // Modify the condition code of operands in OperandsToUpdate.
- // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
- // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
- for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
- OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
- return true;
+ // If CPSR is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if (!isSafe) {
+ MachineBasicBlock *MBB = CmpInstr.getParent();
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(ARM::CPSR))
+ return false;
}
+
+ // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
+ // set CPSR so this is represented as an explicit output)
+ if (!IsThumb1) {
+ MI->getOperand(5).setReg(ARM::CPSR);
+ MI->getOperand(5).setIsDef(true);
}
-
- return false;
+ assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
+ CmpInstr.eraseFromParent();
+
+ // Modify the condition code of operands in OperandsToUpdate.
+ // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
+ // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
+ OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
+
+ return true;
}
bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
@@ -2728,7 +2908,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
switch (UseOpc) {
default: break;
case ARM::ADDrr:
- case ARM::SUBrr: {
+ case ARM::SUBrr:
if (UseOpc == ARM::SUBrr && Commute)
return false;
@@ -2744,9 +2924,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
break;
- }
case ARM::ORRrr:
- case ARM::EORrr: {
+ case ARM::EORrr:
if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
return false;
SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
@@ -2757,9 +2936,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
case ARM::EORrr: NewUseOpc = ARM::EORri; break;
}
break;
- }
case ARM::t2ADDrr:
- case ARM::t2SUBrr: {
+ case ARM::t2SUBrr:
if (UseOpc == ARM::t2SUBrr && Commute)
return false;
@@ -2775,9 +2953,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
break;
- }
case ARM::t2ORRrr:
- case ARM::t2EORrr: {
+ case ARM::t2EORrr:
if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
return false;
SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
@@ -2789,7 +2966,6 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
}
break;
}
- }
}
}
@@ -2797,11 +2973,12 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned Reg1 = UseMI.getOperand(OpIdx).getReg();
bool isKill = UseMI.getOperand(OpIdx).isKill();
unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
- AddDefaultCC(
- AddDefaultPred(BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
- get(NewUseOpc), NewReg)
- .addReg(Reg1, getKillRegState(isKill))
- .addImm(SOImmValV1)));
+ BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
+ NewReg)
+ .addReg(Reg1, getKillRegState(isKill))
+ .addImm(SOImmValV1)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
UseMI.setDesc(get(NewUseOpc));
UseMI.getOperand(1).setReg(NewReg);
UseMI.getOperand(1).setIsKill();
@@ -3261,6 +3438,22 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
return DefCycle;
}
+bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const {
+ unsigned BaseReg = MI.getOperand(0).getReg();
+ for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) {
+ const auto &Op = MI.getOperand(i);
+ if (Op.isReg() && Op.getReg() == BaseReg)
+ return true;
+ }
+ return false;
+}
+unsigned
+ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const {
+ // ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops
+ // (outs GPR:$wb), (ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops)
+ return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands();
+}
+
int
ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
@@ -3413,7 +3606,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::t2LDMDB:
case ARM::t2LDMIA_UPD:
case ARM::t2LDMDB_UPD:
- LdmBypass = 1;
+ LdmBypass = true;
DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
break;
}
@@ -3888,12 +4081,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::t2LDRs:
case ARM::t2LDRBs:
case ARM::t2LDRHs:
- case ARM::t2LDRSHs: {
+ case ARM::t2LDRSHs:
// Thumb2 mode: lsl 0-3 only.
Latency -= 2;
break;
}
- }
}
if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
@@ -4032,7 +4224,8 @@ unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
const MCInstrDesc &MCID = MI.getDesc();
- if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) {
+ if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
+ !Subtarget.cheapPredicableCPSRDef())) {
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
return 1;
@@ -4061,7 +4254,8 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
}
const MCInstrDesc &MCID = MI.getDesc();
- if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
+ if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
+ !Subtarget.cheapPredicableCPSRDef()))) {
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
*PredCost = 1;
@@ -4180,14 +4374,14 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
- MIB.addMemOperand(MMO);
- AddDefaultPred(MIB);
+ MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
}
MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
- MIB.addReg(Reg, RegState::Kill).addImm(0);
- MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
- AddDefaultPred(MIB);
+ MIB.addReg(Reg, RegState::Kill)
+ .addImm(0)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
+ .add(predOps(ARMCC::AL));
}
bool
@@ -4222,6 +4416,7 @@ enum ARMExeDomain {
ExeVFP = 1,
ExeNEON = 2
};
+
//
// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
//
@@ -4345,8 +4540,10 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
// Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
MI.setDesc(get(ARM::VORRd));
- AddDefaultPred(
- MIB.addReg(DstReg, RegState::Define).addReg(SrcReg).addReg(SrcReg));
+ MIB.addReg(DstReg, RegState::Define)
+ .addReg(SrcReg)
+ .addReg(SrcReg)
+ .add(predOps(ARMCC::AL));
break;
case ARM::VMOVRS:
if (Domain != ExeNEON)
@@ -4366,9 +4563,10 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
// Note that DSrc has been widened and the other lane may be undef, which
// contaminates the entire register.
MI.setDesc(get(ARM::VGETLNi32));
- AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
- .addReg(DReg, RegState::Undef)
- .addImm(Lane));
+ MIB.addReg(DstReg, RegState::Define)
+ .addReg(DReg, RegState::Undef)
+ .addImm(Lane)
+ .add(predOps(ARMCC::AL));
// The old source should be an implicit use, otherwise we might think it
// was dead before here.
@@ -4398,8 +4596,8 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
MIB.addReg(DReg, RegState::Define)
.addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
.addReg(SrcReg)
- .addImm(Lane);
- AddDefaultPred(MIB);
+ .addImm(Lane)
+ .add(predOps(ARMCC::AL));
// The narrower destination must be marked as set to keep previous chains
// in place.
@@ -4433,8 +4631,8 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
MI.setDesc(get(ARM::VDUPLN32d));
MIB.addReg(DDst, RegState::Define)
.addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
- .addImm(SrcLane);
- AddDefaultPred(MIB);
+ .addImm(SrcLane)
+ .add(predOps(ARMCC::AL));
// Neither the source or the destination are naturally represented any
// more, so add them in manually.
@@ -4470,10 +4668,9 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
CurUndef = !MI.readsRegister(CurReg, TRI);
- NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
-
- NewMIB.addImm(1);
- AddDefaultPred(NewMIB);
+ NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
if (SrcLane == DstLane)
NewMIB.addReg(SrcReg, RegState::Implicit);
@@ -4489,10 +4686,9 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
- MIB.addReg(CurReg, getUndefRegState(CurUndef));
-
- MIB.addImm(1);
- AddDefaultPred(MIB);
+ MIB.addReg(CurReg, getUndefRegState(CurUndef))
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
if (SrcLane != DstLane)
MIB.addReg(SrcReg, RegState::Implicit);
@@ -4505,7 +4701,6 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
break;
}
}
-
}
//===----------------------------------------------------------------------===//
@@ -4613,9 +4808,9 @@ void ARMBaseInstrInfo::breakPartialRegDependency(
// Insert the dependency-breaking FCONSTD before MI.
// 96 is the encoding of 0.5, but the actual value doesn't matter here.
- AddDefaultPred(
- BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
- .addImm(96));
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
+ .addImm(96)
+ .add(predOps(ARMCC::AL));
MI.addRegisterKilled(DReg, TRI, true);
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index b01d5c8..c52e572 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -17,16 +17,21 @@
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/CodeGen.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include <array>
+#include <cstdint>
#define GET_INSTRINFO_HEADER
#include "ARMGenInstrInfo.inc"
namespace llvm {
- class ARMSubtarget;
- class ARMBaseRegisterInfo;
+
+class ARMBaseRegisterInfo;
+class ARMSubtarget;
class ARMBaseInstrInfo : public ARMGenInstrInfo {
const ARMSubtarget &Subtarget;
@@ -100,13 +105,9 @@ public:
// Return whether the target has an explicit NOP encoding.
bool hasNOP() const;
- virtual void getNoopForElfTarget(MCInst &NopInst) const {
- getNoopForMachoTarget(NopInst);
- }
-
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
- virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
+ virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0;
MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstr &MI,
@@ -156,7 +157,25 @@ public:
bool DefinesPredicate(MachineInstr &MI,
std::vector<MachineOperand> &Pred) const override;
- bool isPredicable(MachineInstr &MI) const override;
+ bool isPredicable(const MachineInstr &MI) const override;
+
+ // CPSR defined in instruction
+ static bool isCPSRDefined(const MachineInstr &MI);
+ bool isAddrMode3OpImm(const MachineInstr &MI, unsigned Op) const;
+ bool isAddrMode3OpMinusReg(const MachineInstr &MI, unsigned Op) const;
+
+ // Load, scaled register offset
+ bool isLdstScaledReg(const MachineInstr &MI, unsigned Op) const;
+ // Load, scaled register offset, not plus LSL2
+ bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const;
+ // Minus reg for ldstso addr mode
+ bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const;
+ // Scaled register offset in address mode 2
+ bool isAm2ScaledReg(const MachineInstr &MI, unsigned Op) const;
+ // Load multiple, base reg in list
+ bool isLDMBaseRegInList(const MachineInstr &MI) const;
+ // get LDM variable defs size
+ unsigned getLDMVariableDefsSize(const MachineInstr &MI) const;
/// GetInstSize - Returns the size of the specified MachineInstr.
///
@@ -399,27 +418,43 @@ public:
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
bool isSwiftFastImmShift(const MachineInstr *MI) const;
-};
-static inline
-const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
- return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
-}
+ /// Returns predicate register associated with the given frame instruction.
+ unsigned getFramePred(const MachineInstr &MI) const {
+ assert(isFrameInstr(MI));
+ // Operands of ADJCALLSTACKDOWN/ADJCALLSTACKUP:
+ // - argument declared in the pattern:
+ // 0 - frame size
+ // 1 - arg of CALLSEQ_START/CALLSEQ_END
+ // 2 - predicate code (like ARMCC::AL)
+ // - added by predOps:
+ // 3 - predicate reg
+ return MI.getOperand(3).getReg();
+ }
+};
-static inline
-const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
- return MIB.addReg(0);
+/// Get the operands corresponding to the given \p Pred value. By default, the
+/// predicate register is assumed to be 0 (no register), but you can pass in a
+/// \p PredReg if that is not the case.
+static inline std::array<MachineOperand, 2> predOps(ARMCC::CondCodes Pred,
+ unsigned PredReg = 0) {
+ return {{MachineOperand::CreateImm(static_cast<int64_t>(Pred)),
+ MachineOperand::CreateReg(PredReg, false)}};
}
-static inline
-const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB,
- bool isDead = false) {
- return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead));
+/// Get the operand corresponding to the conditional code result. By default,
+/// this is 0 (no register).
+static inline MachineOperand condCodeOp(unsigned CCReg = 0) {
+ return MachineOperand::CreateReg(CCReg, false);
}
-static inline
-const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) {
- return MIB.addReg(0);
+/// Get the operand corresponding to the conditional code result for Thumb1.
+/// This operand will always refer to CPSR and it will have the Define flag set.
+/// You can optionally set the Dead flag by means of \p isDead.
+static inline MachineOperand t1CondCodeOp(bool isDead = false) {
+ return MachineOperand::CreateReg(ARM::CPSR,
+ /*Define*/ true, /*Implicit*/ false,
+ /*Kill*/ false, isDead);
}
static inline
@@ -517,6 +552,6 @@ bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int &Offset,
const ARMBaseInstrInfo &TII);
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index d995c63..370c0a7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -18,25 +18,35 @@
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <utility>
#define DEBUG_TYPE "arm-register-info"
@@ -46,7 +56,7 @@
using namespace llvm;
ARMBaseRegisterInfo::ARMBaseRegisterInfo()
- : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {}
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC) {}
static unsigned getFramePointerReg(const ARMSubtarget &STI) {
return STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11;
@@ -107,7 +117,7 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
if (CC == CallingConv::GHC)
- // This is academic becase all GHC calls are (supposed to be) tail calls
+ // This is academic because all GHC calls are (supposed to be) tail calls
return CSR_NoRegs_RegMask;
if (STI.isTargetDarwin() && STI.getTargetLowering()->supportSwiftError() &&
@@ -140,7 +150,6 @@ ARMBaseRegisterInfo::getSjLjDispatchPreservedMask(const MachineFunction &MF) con
return CSR_FPRegs_RegMask;
}
-
const uint32_t *
ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
@@ -154,7 +163,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
// both or otherwise does not want to enable this optimization, the function
// should return NULL
if (CC == CallingConv::GHC)
- // This is academic becase all GHC calls are (supposed to be) tail calls
+ // This is academic because all GHC calls are (supposed to be) tail calls
return nullptr;
return STI.isTargetDarwin() ? CSR_iOS_ThisReturn_RegMask
: CSR_AAPCS_ThisReturn_RegMask;
@@ -184,10 +193,11 @@ getReservedRegs(const MachineFunction &MF) const {
for (unsigned R = 0; R < 16; ++R)
markSuperRegs(Reserved, ARM::D16 + R);
}
- const TargetRegisterClass *RC = &ARM::GPRPairRegClass;
- for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I)
- for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI)
- if (Reserved.test(*SI)) markSuperRegs(Reserved, *I);
+ const TargetRegisterClass &RC = ARM::GPRPairRegClass;
+ for (unsigned Reg : RC)
+ for (MCSubRegIterator SI(Reg, this); SI.isValid(); ++SI)
+ if (Reserved.test(*SI))
+ markSuperRegs(Reserved, Reg);
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
@@ -236,11 +246,18 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
switch (RC->getID()) {
default:
return 0;
- case ARM::tGPRRegClassID:
- return TFI->hasFP(MF) ? 4 : 5;
+ case ARM::tGPRRegClassID: {
+ // hasFP ends up calling getMaxCallFrameComputed() which may not be
+ // available when getPressureLimit() is called as part of
+ // ScheduleDAGRRList.
+ bool HasFP = MF.getFrameInfo().isMaxCallFrameSizeComputed()
+ ? TFI->hasFP(MF) : true;
+ return 5 - HasFP;
+ }
case ARM::GPRRegClassID: {
- unsigned FP = TFI->hasFP(MF) ? 1 : 0;
- return 10 - FP - (STI.isR9Reserved() ? 1 : 0);
+ bool HasFP = MF.getFrameInfo().isMaxCallFrameSizeComputed()
+ ? TFI->hasFP(MF) : true;
+ return 10 - HasFP - (STI.isR9Reserved() ? 1 : 0);
}
case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
case ARM::DPRRegClassID:
@@ -299,8 +316,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
Hints.push_back(PairedPhys);
// Then prefer even or odd registers.
- for (unsigned I = 0, E = Order.size(); I != E; ++I) {
- unsigned Reg = Order[I];
+ for (unsigned Reg : Order) {
if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd)
continue;
// Don't provide hints that are paired to a reserved register.
@@ -425,10 +441,11 @@ void ARMBaseRegisterInfo::emitLoadConstPool(
unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
- .addReg(DestReg, getDefRegState(true), SubIdx)
- .addConstantPoolIndex(Idx)
- .addImm(0).addImm(Pred).addReg(PredReg)
- .setMIFlags(MIFlags);
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx)
+ .addImm(0)
+ .add(predOps(Pred, PredReg))
+ .setMIFlags(MIFlags);
}
bool ARMBaseRegisterInfo::
@@ -474,26 +491,23 @@ getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
Scale = 4;
break;
}
- case ARMII::AddrMode2: {
+ case ARMII::AddrMode2:
ImmIdx = Idx+2;
InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm());
if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
InstrOffs = -InstrOffs;
break;
- }
- case ARMII::AddrMode3: {
+ case ARMII::AddrMode3:
ImmIdx = Idx+2;
InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm());
if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
InstrOffs = -InstrOffs;
break;
- }
- case ARMII::AddrModeT1_s: {
+ case ARMII::AddrModeT1_s:
ImmIdx = Idx+1;
InstrOffs = MI->getOperand(ImmIdx).getImm();
Scale = 4;
break;
- }
default:
llvm_unreachable("Unsupported addressing mode!");
}
@@ -609,7 +623,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
.addFrameIndex(FrameIdx).addImm(Offset);
if (!AFI->isThumb1OnlyFunction())
- AddDefaultCC(AddDefaultPred(MIB));
+ MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
}
void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
@@ -636,7 +650,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
assert(AFI->isThumb2Function());
Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
}
- assert (Done && "Unable to resolve frame index!");
+ assert(Done && "Unable to resolve frame index!");
(void)Done;
}
@@ -645,11 +659,8 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned Ba
const MCInstrDesc &Desc = MI->getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
unsigned i = 0;
-
- while (!MI->getOperand(i).isFI()) {
- ++i;
- assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
- }
+ for (; !MI->getOperand(i).isFI(); ++i)
+ assert(i+1 < MI->getNumOperands() && "Instr doesn't have FrameIndex operand!");
// AddrMode4 and AddrMode6 cannot handle any offset.
if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
@@ -799,7 +810,8 @@ bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI,
if (!DstSubReg)
return true;
// Small registers don't frequently cause a problem, so we can coalesce them.
- if (NewRC->getSize() < 32 && DstRC->getSize() < 32 && SrcRC->getSize() < 32)
+ if (getRegSizeInBits(*NewRC) < 256 && getRegSizeInBits(*DstRC) < 256 &&
+ getRegSizeInBits(*SrcRC) < 256)
return true;
auto NewRCWeight =
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 330e153..2e91d9d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -15,24 +15,33 @@
#define LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H
#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <cstdint>
#define GET_REGINFO_HEADER
#include "ARMGenRegisterInfo.inc"
namespace llvm {
+
/// Register allocation hints.
namespace ARMRI {
+
enum {
RegPairOdd = 1,
RegPairEven = 2
};
-}
+
+} // end namespace ARMRI
/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
/// or a stack/pc register that we should push/pop.
static inline bool isARMArea1Register(unsigned Reg, bool isIOS) {
using namespace ARM;
+
switch (Reg) {
case R0: case R1: case R2: case R3:
case R4: case R5: case R6: case R7:
@@ -48,6 +57,7 @@ static inline bool isARMArea1Register(unsigned Reg, bool isIOS) {
static inline bool isARMArea2Register(unsigned Reg, bool isIOS) {
using namespace ARM;
+
switch (Reg) {
case R8: case R9: case R10: case R11: case R12:
// iOS has this second area.
@@ -59,6 +69,7 @@ static inline bool isARMArea2Register(unsigned Reg, bool isIOS) {
static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
using namespace ARM;
+
switch (Reg) {
case D15: case D14: case D13: case D12:
case D11: case D10: case D9: case D8:
@@ -87,7 +98,7 @@ protected:
/// BasePtr - ARM physical register used as a base ptr in complex stack
/// frames. I.e., when we need a 3rd base, not just SP and FP, due to
/// variable size stack objects.
- unsigned BasePtr;
+ unsigned BasePtr = ARM::R6;
// Can be only subclassed.
explicit ARMBaseRegisterInfo();
@@ -198,4 +209,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h b/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h
index 780544f..e0cb0aa 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h
@@ -14,9 +14,9 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
#define LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
-#include "ARM.h"
-#include "ARMMachineFunctionInfo.h"
-using namespace llvm;
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cstdint>
namespace llvm {
@@ -44,31 +44,30 @@ struct BasicBlockInfo {
///
/// Because worst case padding is used, the computed offset of an aligned
/// block may not actually be aligned.
- unsigned Offset;
+ unsigned Offset = 0;
/// Size - Size of the basic block in bytes. If the block contains
/// inline assembly, this is a worst case estimate.
///
/// The size does not include any alignment padding whether from the
/// beginning of the block, or from an aligned jump table at the end.
- unsigned Size;
+ unsigned Size = 0;
/// KnownBits - The number of low bits in Offset that are known to be
/// exact. The remaining bits of Offset are an upper bound.
- uint8_t KnownBits;
+ uint8_t KnownBits = 0;
/// Unalign - When non-zero, the block contains instructions (inline asm)
/// of unknown size. The real size may be smaller than Size bytes by a
/// multiple of 1 << Unalign.
- uint8_t Unalign;
+ uint8_t Unalign = 0;
/// PostAlign - When non-zero, the block terminator contains a .align
/// directive, so the end of the block is aligned to 1 << PostAlign
/// bytes.
- uint8_t PostAlign;
+ uint8_t PostAlign = 0;
- BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
- PostAlign(0) {}
+ BasicBlockInfo() = default;
/// Compute the number of known offset bits internally to this block.
/// This number should be used to predict worst case padding when
@@ -107,4 +106,4 @@ struct BasicBlockInfo {
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
index 52c95b6..051827a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -17,8 +17,11 @@
#include "ARMBaseInstrInfo.h"
#include "ARMISelLowering.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
@@ -30,25 +33,61 @@ using namespace llvm;
ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI)
: CallLowering(&TLI) {}
-static bool isSupportedType(const DataLayout DL, const ARMTargetLowering &TLI,
+static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI,
Type *T) {
- EVT VT = TLI.getValueType(DL, T);
- if (!VT.isSimple() || !VT.isInteger() || VT.isVector())
+ if (T->isArrayTy())
+ return true;
+
+ if (T->isStructTy()) {
+ // For now we only allow homogeneous structs that we can manipulate with
+ // G_MERGE_VALUES and G_UNMERGE_VALUES
+ auto StructT = cast<StructType>(T);
+ for (unsigned i = 1, e = StructT->getNumElements(); i != e; ++i)
+ if (StructT->getElementType(i) != StructT->getElementType(0))
+ return false;
+ return true;
+ }
+
+ EVT VT = TLI.getValueType(DL, T, true);
+ if (!VT.isSimple() || VT.isVector() ||
+ !(VT.isInteger() || VT.isFloatingPoint()))
return false;
unsigned VTSize = VT.getSimpleVT().getSizeInBits();
- return VTSize == 8 || VTSize == 16 || VTSize == 32;
+
+ if (VTSize == 64)
+ // FIXME: Support i64 too
+ return VT.isFloatingPoint();
+
+ return VTSize == 1 || VTSize == 8 || VTSize == 16 || VTSize == 32;
}
namespace {
-struct FuncReturnHandler : public CallLowering::ValueHandler {
- FuncReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder &MIB)
- : ValueHandler(MIRBuilder, MRI), MIB(MIB) {}
+/// Helper class for values going out through an ABI boundary (used for handling
+/// function return values and call parameters).
+struct OutgoingValueHandler : public CallLowering::ValueHandler {
+ OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstrBuilder &MIB, CCAssignFn *AssignFn)
+ : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), StackSize(0) {}
unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
- llvm_unreachable("Don't know how to get a stack address yet");
+ assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
+ "Unsupported size");
+
+ LLT p0 = LLT::pointer(0, 32);
+ LLT s32 = LLT::scalar(32);
+ unsigned SPReg = MRI.createGenericVirtualRegister(p0);
+ MIRBuilder.buildCopy(SPReg, ARM::SP);
+
+ unsigned OffsetReg = MRI.createGenericVirtualRegister(s32);
+ MIRBuilder.buildConstant(OffsetReg, Offset);
+
+ unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
+ MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
+
+ MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
+ return AddrReg;
}
void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
@@ -56,26 +95,123 @@ struct FuncReturnHandler : public CallLowering::ValueHandler {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
- assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size");
- assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size");
-
- assert(VA.getLocInfo() != CCValAssign::SExt &&
- VA.getLocInfo() != CCValAssign::ZExt &&
- "ABI extensions not supported yet");
+ assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size");
+ assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size");
- MIRBuilder.buildCopy(PhysReg, ValVReg);
+ unsigned ExtReg = extendRegister(ValVReg, VA);
+ MIRBuilder.buildCopy(PhysReg, ExtReg);
MIB.addUse(PhysReg, RegState::Implicit);
}
void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
- llvm_unreachable("Don't know how to assign a value to an address yet");
+ assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
+ "Unsupported size");
+
+ unsigned ExtReg = extendRegister(ValVReg, VA);
+ auto MMO = MIRBuilder.getMF().getMachineMemOperand(
+ MPO, MachineMemOperand::MOStore, VA.getLocVT().getStoreSize(),
+ /* Alignment */ 0);
+ MIRBuilder.buildStore(ExtReg, Addr, *MMO);
+ }
+
+ unsigned assignCustomValue(const CallLowering::ArgInfo &Arg,
+ ArrayRef<CCValAssign> VAs) override {
+ CCValAssign VA = VAs[0];
+ assert(VA.needsCustom() && "Value doesn't need custom handling");
+ assert(VA.getValVT() == MVT::f64 && "Unsupported type");
+
+ CCValAssign NextVA = VAs[1];
+ assert(NextVA.needsCustom() && "Value doesn't need custom handling");
+ assert(NextVA.getValVT() == MVT::f64 && "Unsupported type");
+
+ assert(VA.getValNo() == NextVA.getValNo() &&
+ "Values belong to different arguments");
+
+ assert(VA.isRegLoc() && "Value should be in reg");
+ assert(NextVA.isRegLoc() && "Value should be in reg");
+
+ unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+ MRI.createGenericVirtualRegister(LLT::scalar(32))};
+ MIRBuilder.buildUnmerge(NewRegs, Arg.Reg);
+
+ bool IsLittle = MIRBuilder.getMF().getSubtarget<ARMSubtarget>().isLittle();
+ if (!IsLittle)
+ std::swap(NewRegs[0], NewRegs[1]);
+
+ assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
+ assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA);
+
+ return 1;
+ }
+
+ bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ const CallLowering::ArgInfo &Info, CCState &State) override {
+ if (AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State))
+ return true;
+
+ StackSize =
+ std::max(StackSize, static_cast<uint64_t>(State.getNextStackOffset()));
+ return false;
}
MachineInstrBuilder &MIB;
+ uint64_t StackSize;
};
} // End anonymous namespace.
+void ARMCallLowering::splitToValueTypes(
+ const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
+ MachineFunction &MF, const SplitArgTy &PerformArgSplit) const {
+ const ARMTargetLowering &TLI = *getTLI<ARMTargetLowering>();
+ LLVMContext &Ctx = OrigArg.Ty->getContext();
+ const DataLayout &DL = MF.getDataLayout();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const Function *F = MF.getFunction();
+
+ SmallVector<EVT, 4> SplitVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
+
+ if (SplitVTs.size() == 1) {
+ // Even if there is no splitting to do, we still want to replace the
+ // original type (e.g. pointer type -> integer).
+ auto Flags = OrigArg.Flags;
+ unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty);
+ Flags.setOrigAlign(OriginalAlignment);
+ SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx), Flags,
+ OrigArg.IsFixed);
+ return;
+ }
+
+ unsigned FirstRegIdx = SplitArgs.size();
+ for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) {
+ EVT SplitVT = SplitVTs[i];
+ Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
+ auto Flags = OrigArg.Flags;
+
+ unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ bool NeedsConsecutiveRegisters =
+ TLI.functionArgumentNeedsConsecutiveRegisters(
+ SplitTy, F->getCallingConv(), F->isVarArg());
+ if (NeedsConsecutiveRegisters) {
+ Flags.setInConsecutiveRegs();
+ if (i == e - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
+
+ SplitArgs.push_back(
+ ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)),
+ SplitTy, Flags, OrigArg.IsFixed});
+ }
+
+ for (unsigned i = 0; i < Offsets.size(); ++i)
+ PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8);
+}
+
/// Lower the return value for the already existing \p Ret. This assumes that
/// \p MIRBuilder's insertion point is correct.
bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
@@ -93,21 +229,29 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
if (!isSupportedType(DL, TLI, Val->getType()))
return false;
+ SmallVector<ArgInfo, 4> SplitVTs;
+ SmallVector<unsigned, 4> Regs;
+ ArgInfo RetInfo(VReg, Val->getType());
+ setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
+ splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) {
+ Regs.push_back(Reg);
+ });
+
+ if (Regs.size() > 1)
+ MIRBuilder.buildUnmerge(Regs, VReg);
+
CCAssignFn *AssignFn =
TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
- ArgInfo RetInfo(VReg, Val->getType());
- setArgFlags(RetInfo, AttributeSet::ReturnIndex, DL, F);
-
- FuncReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret);
- return handleAssignments(MIRBuilder, AssignFn, RetInfo, RetHandler);
+ OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, AssignFn);
+ return handleAssignments(MIRBuilder, SplitVTs, RetHandler);
}
bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val, unsigned VReg) const {
assert(!Val == !VReg && "Return value without a vreg");
- auto Ret = AddDefaultPred(MIRBuilder.buildInstrNoInsert(ARM::BX_RET));
+ auto Ret = MIRBuilder.buildInstrNoInsert(ARM::BX_RET).add(predOps(ARMCC::AL));
if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret))
return false;
@@ -117,13 +261,17 @@ bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
}
namespace {
-struct FormalArgHandler : public CallLowering::ValueHandler {
- FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
- : ValueHandler(MIRBuilder, MRI) {}
+/// Helper class for values coming in through an ABI boundary (used for handling
+/// formal arguments and call return values).
+struct IncomingValueHandler : public CallLowering::ValueHandler {
+ IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn AssignFn)
+ : ValueHandler(MIRBuilder, MRI, AssignFn) {}
unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
- assert(Size == 4 && "Unsupported size");
+ assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
+ "Unsupported size");
auto &MFI = MIRBuilder.getMF().getFrameInfo();
@@ -139,11 +287,30 @@ struct FormalArgHandler : public CallLowering::ValueHandler {
void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
- assert(Size == 4 && "Unsupported size");
+ assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
+ "Unsupported size");
+
+ if (VA.getLocInfo() == CCValAssign::SExt ||
+ VA.getLocInfo() == CCValAssign::ZExt) {
+ // If the value is zero- or sign-extended, its size becomes 4 bytes, so
+ // that's what we should load.
+ Size = 4;
+ assert(MRI.getType(ValVReg).isScalar() && "Only scalars supported atm");
+
+ auto LoadVReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ buildLoad(LoadVReg, Addr, Size, /* Alignment */ 0, MPO);
+ MIRBuilder.buildTrunc(ValVReg, LoadVReg);
+ } else {
+ // If the value is not extended, a simple load will suffice.
+ buildLoad(ValVReg, Addr, Size, /* Alignment */ 0, MPO);
+ }
+ }
+ void buildLoad(unsigned Val, unsigned Addr, uint64_t Size, unsigned Alignment,
+ MachinePointerInfo &MPO) {
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
- MPO, MachineMemOperand::MOLoad, Size, /* Alignment */ 0);
- MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
+ MPO, MachineMemOperand::MOLoad, Size, Alignment);
+ MIRBuilder.buildLoad(Val, Addr, *MMO);
}
void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
@@ -151,12 +318,60 @@ struct FormalArgHandler : public CallLowering::ValueHandler {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
- assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size");
- assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size");
+ assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size");
+ assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size");
- MIRBuilder.getMBB().addLiveIn(PhysReg);
+ // The necessary extensions are handled on the other side of the ABI
+ // boundary.
+ markPhysRegUsed(PhysReg);
MIRBuilder.buildCopy(ValVReg, PhysReg);
}
+
+ unsigned assignCustomValue(const ARMCallLowering::ArgInfo &Arg,
+ ArrayRef<CCValAssign> VAs) override {
+ CCValAssign VA = VAs[0];
+ assert(VA.needsCustom() && "Value doesn't need custom handling");
+ assert(VA.getValVT() == MVT::f64 && "Unsupported type");
+
+ CCValAssign NextVA = VAs[1];
+ assert(NextVA.needsCustom() && "Value doesn't need custom handling");
+ assert(NextVA.getValVT() == MVT::f64 && "Unsupported type");
+
+ assert(VA.getValNo() == NextVA.getValNo() &&
+ "Values belong to different arguments");
+
+ assert(VA.isRegLoc() && "Value should be in reg");
+ assert(NextVA.isRegLoc() && "Value should be in reg");
+
+ unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+ MRI.createGenericVirtualRegister(LLT::scalar(32))};
+
+ assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
+ assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA);
+
+ bool IsLittle = MIRBuilder.getMF().getSubtarget<ARMSubtarget>().isLittle();
+ if (!IsLittle)
+ std::swap(NewRegs[0], NewRegs[1]);
+
+ MIRBuilder.buildMerge(Arg.Reg, NewRegs);
+
+ return 1;
+ }
+
+ /// Marking a physical register as used is different between formal
+ /// parameters, where it's a basic block live-in, and call returns, where it's
+ /// an implicit-def of the call instruction.
+ virtual void markPhysRegUsed(unsigned PhysReg) = 0;
+};
+
+struct FormalArgHandler : public IncomingValueHandler {
+ FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn AssignFn)
+ : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
+
+ void markPhysRegUsed(unsigned PhysReg) override {
+ MIRBuilder.getMBB().addLiveIn(PhysReg);
+ }
};
} // End anonymous namespace
@@ -170,34 +385,150 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
if (F.isVarArg())
return false;
- auto DL = MIRBuilder.getMF().getDataLayout();
+ auto &MF = MIRBuilder.getMF();
+ auto &MBB = MIRBuilder.getMBB();
+ auto DL = MF.getDataLayout();
auto &TLI = *getTLI<ARMTargetLowering>();
- auto &Args = F.getArgumentList();
- unsigned ArgIdx = 0;
- for (auto &Arg : Args) {
- ArgIdx++;
- if (!isSupportedType(DL, TLI, Arg.getType()))
- return false;
+ auto Subtarget = TLI.getSubtarget();
+
+ if (Subtarget->isThumb())
+ return false;
- // FIXME: This check as well as ArgIdx are going away as soon as we support
- // loading values < 32 bits.
- if (ArgIdx > 4 && Arg.getType()->getIntegerBitWidth() != 32)
+ for (auto &Arg : F.args())
+ if (!isSupportedType(DL, TLI, Arg.getType()))
return false;
- }
CCAssignFn *AssignFn =
TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg());
+ FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(),
+ AssignFn);
+
SmallVector<ArgInfo, 8> ArgInfos;
+ SmallVector<unsigned, 4> SplitRegs;
unsigned Idx = 0;
- for (auto &Arg : Args) {
+ for (auto &Arg : F.args()) {
ArgInfo AInfo(VRegs[Idx], Arg.getType());
- setArgFlags(AInfo, Idx + 1, DL, F);
- ArgInfos.push_back(AInfo);
+ setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F);
+
+ SplitRegs.clear();
+
+ splitToValueTypes(AInfo, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) {
+ SplitRegs.push_back(Reg);
+ });
+
+ if (!SplitRegs.empty())
+ MIRBuilder.buildMerge(VRegs[Idx], SplitRegs);
+
Idx++;
}
- FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo());
- return handleAssignments(MIRBuilder, AssignFn, ArgInfos, ArgHandler);
+ if (!MBB.empty())
+ MIRBuilder.setInstr(*MBB.begin());
+
+ return handleAssignments(MIRBuilder, ArgInfos, ArgHandler);
+}
+
+namespace {
+struct CallReturnHandler : public IncomingValueHandler {
+ CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstrBuilder MIB, CCAssignFn *AssignFn)
+ : IncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+
+ void markPhysRegUsed(unsigned PhysReg) override {
+ MIB.addDef(PhysReg, RegState::Implicit);
+ }
+
+ MachineInstrBuilder MIB;
+};
+} // End anonymous namespace.
+
+bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
+ CallingConv::ID CallConv,
+ const MachineOperand &Callee,
+ const ArgInfo &OrigRet,
+ ArrayRef<ArgInfo> OrigArgs) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const auto &TLI = *getTLI<ARMTargetLowering>();
+ const auto &DL = MF.getDataLayout();
+ const auto &STI = MF.getSubtarget();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (MF.getSubtarget<ARMSubtarget>().genLongCalls())
+ return false;
+
+ auto CallSeqStart = MIRBuilder.buildInstr(ARM::ADJCALLSTACKDOWN);
+
+ // Create the call instruction so we can add the implicit uses of arg
+ // registers, but don't insert it yet.
+ auto MIB = MIRBuilder.buildInstrNoInsert(ARM::BLX).add(Callee).addRegMask(
+ TRI->getCallPreservedMask(MF, CallConv));
+ if (Callee.isReg()) {
+ auto CalleeReg = Callee.getReg();
+ if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg))
+ MIB->getOperand(0).setReg(constrainOperandRegClass(
+ MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(),
+ *MIB.getInstr(), MIB->getDesc(), CalleeReg, 0));
+ }
+
+ SmallVector<ArgInfo, 8> ArgInfos;
+ for (auto Arg : OrigArgs) {
+ if (!isSupportedType(DL, TLI, Arg.Ty))
+ return false;
+
+ if (!Arg.IsFixed)
+ return false;
+
+ SmallVector<unsigned, 8> Regs;
+ splitToValueTypes(Arg, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) {
+ Regs.push_back(Reg);
+ });
+
+ if (Regs.size() > 1)
+ MIRBuilder.buildUnmerge(Regs, Arg.Reg);
+ }
+
+ auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
+ OutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn);
+ if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler))
+ return false;
+
+ // Now we can add the actual call instruction to the correct basic block.
+ MIRBuilder.insertInstr(MIB);
+
+ if (!OrigRet.Ty->isVoidTy()) {
+ if (!isSupportedType(DL, TLI, OrigRet.Ty))
+ return false;
+
+ ArgInfos.clear();
+ SmallVector<unsigned, 8> SplitRegs;
+ splitToValueTypes(OrigRet, ArgInfos, MF,
+ [&](unsigned Reg, uint64_t Offset) {
+ SplitRegs.push_back(Reg);
+ });
+
+ auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, /*IsVarArg=*/false);
+ CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn);
+ if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler))
+ return false;
+
+ if (!SplitRegs.empty()) {
+ // We have split the value and allocated each individual piece, now build
+ // it up again.
+ MIRBuilder.buildMerge(OrigRet.Reg, SplitRegs);
+ }
+ }
+
+ // We now know the size of the stack - update the ADJCALLSTACKDOWN
+ // accordingly.
+ CallSeqStart.addImm(ArgHandler.StackSize).addImm(0).add(predOps(ARMCC::AL));
+
+ MIRBuilder.buildInstr(ARM::ADJCALLSTACKUP)
+ .addImm(ArgHandler.StackSize)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+
+ return true;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
index 6a1b886..f5a6872 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
@@ -34,9 +34,22 @@ public:
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
+ bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
+ const MachineOperand &Callee, const ArgInfo &OrigRet,
+ ArrayRef<ArgInfo> OrigArgs) const override;
+
private:
bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
unsigned VReg, MachineInstrBuilder &Ret) const;
+
+ typedef std::function<void(unsigned Reg, uint64_t Offset)> SplitArgTy;
+
+ /// Split an argument into one or more arguments that the CC lowering can cope
+ /// with (e.g. replace pointers with integers).
+ void splitToValueTypes(const ArgInfo &OrigArg,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ MachineFunction &MF,
+ const SplitArgTy &PerformArgSplit) const;
};
} // End of namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index 7a7b7fe..bc7afdb 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -273,9 +273,9 @@ def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>;
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
(sub CSR_AAPCS_ThisReturn, R9))>;
-def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP,
- (sequence "R%u", 12, 1),
- (sequence "D%u", 31, 0))>;
+def CSR_iOS_TLSCall
+ : CalleeSavedRegs<(add LR, SP, (sub(sequence "R%u", 12, 1), R9, R12),
+ (sequence "D%u", 31, 0))>;
// C++ TLS access function saves all registers except SP. Try to match
// the order of CSRs in CSR_iOS.
diff --git a/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp b/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp
index 64f187d..e145d0a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp
@@ -8,7 +8,15 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMBasicBlockInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <vector>
+
using namespace llvm;
namespace llvm {
@@ -69,4 +77,4 @@ std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF) {
return BBInfo;
}
-} // end namespace
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index be1a37e..667337d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -14,30 +14,50 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMBasicBlockInfo.h"
#include "ARMMachineFunctionInfo.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <new>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "arm-cp-islands"
+#define ARM_CP_ISLANDS_OPT_NAME \
+ "ARM constant island placement and branch shortening pass"
STATISTIC(NumCPEs, "Number of constpool entries");
STATISTIC(NumSplit, "Number of uncond branches inserted");
STATISTIC(NumCBrFixed, "Number of cond branches fixed");
@@ -49,7 +69,6 @@ STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
-
static cl::opt<bool>
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
cl::desc("Adjust basic block layout to better use TB[BH]"));
@@ -64,6 +83,7 @@ static cl::opt<bool> SynthesizeThumb1TBB(
"equivalent to the TBB/TBH instructions"));
namespace {
+
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
/// requires constant pool entries to be scattered among the instructions
/// inside a function. To do this, it completely ignores the normal LLVM
@@ -76,7 +96,6 @@ namespace {
/// CPE - A constant pool entry that has been placed somewhere, which
/// tracks a list of users.
class ARMConstantIslands : public MachineFunctionPass {
-
std::vector<BasicBlockInfo> BBInfo;
/// WaterList - A sorted list of basic blocks where islands could be placed
@@ -110,12 +129,14 @@ namespace {
bool NegOk;
bool IsSoImm;
bool KnownAlignment;
+
CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
bool neg, bool soimm)
: MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm),
KnownAlignment(false) {
HighWaterMark = CPEMI->getParent();
}
+
/// getMaxDisp - Returns the maximum displacement supported by MI.
/// Correct for unknown alignment.
/// Conservatively subtract 2 bytes to handle weird alignment effects.
@@ -135,6 +156,7 @@ namespace {
MachineInstr *CPEMI;
unsigned CPI;
unsigned RefCount;
+
CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
: CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
};
@@ -148,7 +170,7 @@ namespace {
/// The first half of CPEntries contains generic constants, the second half
/// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up
/// which vector it will be in here.
- std::vector<std::vector<CPEntry> > CPEntries;
+ std::vector<std::vector<CPEntry>> CPEntries;
/// Maps a JT index to the offset in CPEntries containing copies of that
/// table. The equivalent map for a CONSTPOOL_ENTRY is the identity.
@@ -167,6 +189,7 @@ namespace {
unsigned MaxDisp : 31;
bool isCond : 1;
unsigned UncondBr;
+
ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, unsigned ubr)
: MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
};
@@ -195,8 +218,10 @@ namespace {
bool isThumb1;
bool isThumb2;
bool isPositionIndependentOrROPI;
+
public:
static char ID;
+
ARMConstantIslands() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -207,7 +232,7 @@ namespace {
}
StringRef getPassName() const override {
- return "ARM constant island placement and branch shortening pass";
+ return ARM_CP_ISLANDS_OPT_NAME;
}
private:
@@ -264,8 +289,10 @@ namespace {
U.getMaxDisp(), U.NegOk, U.IsSoImm);
}
};
+
char ARMConstantIslands::ID = 0;
-}
+
+} // end anonymous namespace
/// verify - check BBOffsets, BBSizes, alignment of islands
void ARMConstantIslands::verify() {
@@ -295,8 +322,9 @@ void ARMConstantIslands::verify() {
#endif
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// print block size and offset information - debugging
-void ARMConstantIslands::dumpBBs() {
+LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
DEBUG({
for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
const BasicBlockInfo &BBI = BBInfo[J];
@@ -308,12 +336,7 @@ void ARMConstantIslands::dumpBBs() {
}
});
}
-
-/// createARMConstantIslandPass - returns an instance of the constpool
-/// island pass.
-FunctionPass *llvm::createARMConstantIslandPass() {
- return new ARMConstantIslands();
-}
+#endif
bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
@@ -782,6 +805,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
case ARM::LDRcp:
case ARM::t2LDRpci:
case ARM::t2LDRHpci:
+ case ARM::t2LDRBpci:
Bits = 12; // +-offset_12
NegOk = true;
break;
@@ -873,7 +897,6 @@ void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
WaterList.insert(IP, NewBB);
}
-
/// Split the basic block containing MI into two blocks, which are joined by
/// an unconditional branch. Update data structures and renumber blocks to
/// account for this change and returns the newly created block.
@@ -897,8 +920,9 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
if (!isThumb)
BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
else
- BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB)
- .addImm(ARMCC::AL).addReg(0);
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc))
+ .addMBB(NewBB)
+ .add(predOps(ARMCC::AL));
++NumSplit;
// Update the CFG. All succs of OrigBB are now succs of NewBB.
@@ -1296,8 +1320,9 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
if (!isThumb)
BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
else
- BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
- .addImm(ARMCC::AL).addReg(0);
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr))
+ .addMBB(NewMBB)
+ .add(predOps(ARMCC::AL));
unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
ImmBranches.push_back(ImmBranch(&UserMBB->back(),
MaxDisp, false, UncondBr));
@@ -1477,7 +1502,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
// add it to the island.
U.HighWaterMark = NewIsland;
U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc())
- .addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size);
+ .addImm(ID)
+ .add(CPEMI->getOperand(1))
+ .addImm(Size);
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
++NumCPEs;
@@ -1681,8 +1708,9 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
Br.MI = &MBB->back();
BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
if (isThumb)
- BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
- .addImm(ARMCC::AL).addReg(0);
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr))
+ .addMBB(DestBB)
+ .add(predOps(ARMCC::AL));
else
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
@@ -1709,8 +1737,14 @@ bool ARMConstantIslands::undoLRSpillRestore() {
MI->getNumExplicitOperands() == 3) {
// Create the new insn and copy the predicate from the old.
BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1));
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1));
+ MI->eraseFromParent();
+ MadeChange = true;
+ } else if (MI->getOpcode() == ARM::tPUSH &&
+ MI->getOperand(2).getReg() == ARM::LR &&
+ MI->getNumExplicitOperands() == 3) {
+ // Just remove the push.
MI->eraseFromParent();
MadeChange = true;
}
@@ -1792,13 +1826,12 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
Bits = 11;
Scale = 2;
break;
- case ARM::t2Bcc: {
+ case ARM::t2Bcc:
NewOpc = ARM::tBcc;
Bits = 8;
Scale = 2;
break;
}
- }
if (NewOpc) {
unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
@@ -1983,6 +2016,54 @@ static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
&*MBB->begin() == CPEMI;
}
+static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI,
+ MachineInstr *JumpMI,
+ unsigned &DeadSize) {
+ // Remove a dead add between the LEA and JT, which used to compute EntryReg,
+ // but the JT now uses PC. Finds the last ADD (if any) that def's EntryReg
+ // and is not clobbered / used.
+ MachineInstr *RemovableAdd = nullptr;
+ unsigned EntryReg = JumpMI->getOperand(0).getReg();
+
+ // Find the last ADD to set EntryReg
+ MachineBasicBlock::iterator I(LEAMI);
+ for (++I; &*I != JumpMI; ++I) {
+ if (I->getOpcode() == ARM::t2ADDrs && I->getOperand(0).getReg() == EntryReg)
+ RemovableAdd = &*I;
+ }
+
+ if (!RemovableAdd)
+ return;
+
+ // Ensure EntryReg is not clobbered or used.
+ MachineBasicBlock::iterator J(RemovableAdd);
+ for (++J; &*J != JumpMI; ++J) {
+ for (unsigned K = 0, E = J->getNumOperands(); K != E; ++K) {
+ const MachineOperand &MO = J->getOperand(K);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == EntryReg)
+ return;
+ if (MO.isUse() && MO.getReg() == EntryReg)
+ return;
+ }
+ }
+
+ DEBUG(dbgs() << "Removing Dead Add: " << *RemovableAdd);
+ RemovableAdd->eraseFromParent();
+ DeadSize += 4;
+}
+
+static bool registerDefinedBetween(unsigned Reg,
+ MachineBasicBlock::iterator From,
+ MachineBasicBlock::iterator To,
+ const TargetRegisterInfo *TRI) {
+ for (auto I = From; I != To; ++I)
+ if (I->modifiesRegister(Reg, TRI))
+ return true;
+ return false;
+}
+
/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
/// jumptables when it's possible.
bool ARMConstantIslands::optimizeThumb2JumpTables() {
@@ -2060,6 +2141,12 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
IdxReg = Shift->getOperand(2).getReg();
unsigned ShiftedIdxReg = Shift->getOperand(0).getReg();
+ // It's important that IdxReg is live until the actual TBB/TBH. Most of
+ // the range is checked later, but the LEA might still clobber it and not
+ // actually get removed.
+ if (BaseReg == IdxReg && !jumpTableFollowsTB(MI, User.CPEMI))
+ continue;
+
MachineInstr *Load = User.MI->getNextNode();
if (Load->getOpcode() != ARM::tLDRr)
continue;
@@ -2069,6 +2156,16 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
continue;
// If we're in PIC mode, there should be another ADD following.
+ auto *TRI = STI->getRegisterInfo();
+
+ // %base cannot be redefined after the load as it will appear before
+ // TBB/TBH like:
+ // %base =
+ // %base =
+ // tBB %base, %idx
+ if (registerDefinedBetween(BaseReg, Load->getNextNode(), MBB->end(), TRI))
+ continue;
+
if (isPositionIndependentOrROPI) {
MachineInstr *Add = Load->getNextNode();
if (Add->getOpcode() != ARM::tADDrr ||
@@ -2078,22 +2175,26 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
continue;
if (Add->getOperand(0).getReg() != MI->getOperand(0).getReg())
continue;
-
+ if (registerDefinedBetween(IdxReg, Add->getNextNode(), MI, TRI))
+ // IdxReg gets redefined in the middle of the sequence.
+ continue;
Add->eraseFromParent();
DeadSize += 2;
} else {
if (Load->getOperand(0).getReg() != MI->getOperand(0).getReg())
continue;
+ if (registerDefinedBetween(IdxReg, Load->getNextNode(), MI, TRI))
+ // IdxReg gets redefined in the middle of the sequence.
+ continue;
}
-
-
+
// Now safe to delete the load and lsl. The LEA will be removed later.
CanDeleteLEA = true;
Shift->eraseFromParent();
Load->eraseFromParent();
DeadSize += 4;
}
-
+
DEBUG(dbgs() << "Shrink JT: " << *MI);
MachineInstr *CPEMI = User.CPEMI;
unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
@@ -2117,7 +2218,10 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
NewJTMI->getOperand(0).setReg(ARM::PC);
NewJTMI->getOperand(0).setIsKill(false);
- if (CanDeleteLEA) {
+ if (CanDeleteLEA) {
+ if (isThumb2)
+ RemoveDeadAddBetweenLEAAndJT(User.MI, MI, DeadSize);
+
User.MI->eraseFromParent();
DeadSize += isThumb2 ? 4 : 2;
@@ -2238,13 +2342,11 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
if (isThumb2)
BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B))
.addMBB(BB)
- .addImm(ARMCC::AL)
- .addReg(0);
+ .add(predOps(ARMCC::AL));
else
BuildMI(NewBB, DebugLoc(), TII->get(ARM::tB))
.addMBB(BB)
- .addImm(ARMCC::AL)
- .addReg(0);
+ .add(predOps(ARMCC::AL));
// Update internal data structures to account for the newly inserted MBB.
MF->RenumberBlocks(NewBB);
@@ -2256,3 +2358,12 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
++NumJTInserted;
return NewBB;
}
+
+/// createARMConstantIslandPass - returns an instance of the constpool
+/// island pass.
+FunctionPass *llvm::createARMConstantIslandPass() {
+ return new ARMConstantIslands();
+}
+
+INITIALIZE_PASS(ARMConstantIslands, "arm-cp-islands", ARM_CP_ISLANDS_OPT_NAME,
+ false, false)
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
index 2d16028..9705c8b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -13,13 +13,17 @@
#include "ARMConstantPoolValue.h"
#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <cstdlib>
+
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -44,7 +48,7 @@ ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, unsigned id,
LabelId(id), Kind(kind), PCAdjust(PCAdj), Modifier(modifier),
AddCurrentAddress(addCurrentAddress) {}
-ARMConstantPoolValue::~ARMConstantPoolValue() {}
+ARMConstantPoolValue::~ARMConstantPoolValue() = default;
StringRef ARMConstantPoolValue::getModifierText() const {
switch (Modifier) {
@@ -94,9 +98,11 @@ ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
return false;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void ARMConstantPoolValue::dump() const {
errs() << " " << *this;
}
+#endif
void ARMConstantPoolValue::print(raw_ostream &O) const {
if (Modifier) O << "(" << getModifierText() << ")";
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
index 5f61832..61c5215 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -14,10 +14,11 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
#define LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cstddef>
+#include <string>
+#include <vector>
namespace llvm {
@@ -29,6 +30,7 @@ class LLVMContext;
class MachineBasicBlock;
namespace ARMCP {
+
enum ARMCPKind {
CPValue,
CPExtSymbol,
@@ -47,7 +49,8 @@ namespace ARMCP {
SECREL, /// Section Relative (Windows TLS)
SBREL, /// Static Base Relative (RWPI)
};
-}
+
+} // end namespace ARMCP
/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
/// represent PC-relative displacement between the address of the load
@@ -169,9 +172,11 @@ public:
const GlobalValue *getGV() const;
const BlockAddress *getBlockAddress() const;
+
const GlobalVariable *getPromotedGlobal() const {
return dyn_cast_or_null<GlobalVariable>(GVar);
}
+
const Constant *getPromotedGlobalInit() const {
return CVal;
}
@@ -186,6 +191,7 @@ public:
void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
void print(raw_ostream &O) const override;
+
static bool classof(const ARMConstantPoolValue *APV) {
return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA() ||
APV->isPromotedGlobal();
@@ -267,6 +273,6 @@ public:
}
};
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index baa4e03..46d8f0d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -19,6 +19,7 @@
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -30,6 +31,7 @@
#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
+
using namespace llvm;
#define DEBUG_TYPE "arm-pseudo"
@@ -97,9 +99,9 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
const MachineOperand &MO = OldMI.getOperand(i);
assert(MO.isReg() && MO.getReg());
if (MO.isUse())
- UseMI.addOperand(MO);
+ UseMI.add(MO);
else
- DefMI.addOperand(MO);
+ DefMI.add(MO);
}
}
@@ -415,14 +417,14 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
if (TableEntry->isUpdating)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
if (TableEntry->hasWritebackOperand)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// For an instruction writing double-spaced subregs, the pseudo instruction
// has an extra operand that is a use of the super-register. Record the
@@ -432,15 +434,15 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
SrcOpIdx = OpIdx++;
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the super-register source operand used for double-spaced subregs over
// to the new instruction as an implicit operand.
if (SrcOpIdx != 0) {
MachineOperand MO = MI.getOperand(SrcOpIdx);
MO.setImplicit(true);
- MIB.addOperand(MO);
+ MIB.add(MO);
}
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
@@ -467,14 +469,14 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
TII->get(TableEntry->RealOpc));
unsigned OpIdx = 0;
if (TableEntry->isUpdating)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
if (TableEntry->hasWritebackOperand)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
@@ -490,8 +492,8 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MIB.addReg(D3, getUndefRegState(SrcIsUndef));
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
MIB->addRegisterKilled(SrcReg, TRI, true);
@@ -549,14 +551,14 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
}
if (TableEntry->isUpdating)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
if (TableEntry->hasWritebackOperand)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Grab the super-register source.
MachineOperand MO = MI.getOperand(OpIdx++);
@@ -579,12 +581,12 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
OpIdx += 1;
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the super-register source to be an implicit source.
MO.setImplicit(true);
- MIB.addOperand(MO);
+ MIB.add(MO);
if (TableEntry->IsLoad)
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
@@ -605,9 +607,9 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned OpIdx = 0;
// Transfer the destination register operand.
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
if (IsExt)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
@@ -616,11 +618,11 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
MIB.addReg(D0);
// Copy the other source register operand.
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Add an implicit kill and use for the super-reg.
MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill));
@@ -696,8 +698,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
HI16 = HI16.addImm(SOImmValV2);
LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- LO16.addImm(Pred).addReg(PredReg).addReg(0);
- HI16.addImm(Pred).addReg(PredReg).addReg(0);
+ LO16.addImm(Pred).addReg(PredReg).add(condCodeOp());
+ HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
return;
@@ -755,14 +757,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
MI.eraseFromParent();
}
-static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) {
- for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
- MBB->addLiveIn(*I);
-}
-
/// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
-/// possible. This only gets used at -O0 so we don't care about efficiency of the
-/// generated code.
+/// possible. This only gets used at -O0 so we don't care about efficiency of
+/// the generated code.
bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned LdrexOp, unsigned StrexOp,
@@ -771,16 +768,14 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
bool IsThumb = STI->isThumb();
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
- MachineOperand &Dest = MI.getOperand(0);
- unsigned StatusReg = MI.getOperand(1).getReg();
- MachineOperand &Addr = MI.getOperand(2);
- MachineOperand &Desired = MI.getOperand(3);
- MachineOperand &New = MI.getOperand(4);
-
- LivePhysRegs LiveRegs(&TII->getRegisterInfo());
- LiveRegs.addLiveOuts(MBB);
- for (auto I = std::prev(MBB.end()); I != MBBI; --I)
- LiveRegs.stepBackward(*I);
+ const MachineOperand &Dest = MI.getOperand(0);
+ unsigned TempReg = MI.getOperand(1).getReg();
+ // Duplicating undef operands into 2 instructions does not guarantee the same
+ // value on both; However undef should be replaced by xzr anyway.
+ assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
+ unsigned AddrReg = MI.getOperand(2).getReg();
+ unsigned DesiredReg = MI.getOperand(3).getReg();
+ unsigned NewReg = MI.getOperand(4).getReg();
MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
@@ -793,33 +788,30 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
if (UxtOp) {
MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, DL, TII->get(UxtOp), Desired.getReg())
- .addReg(Desired.getReg(), RegState::Kill);
+ BuildMI(MBB, MBBI, DL, TII->get(UxtOp), DesiredReg)
+ .addReg(DesiredReg, RegState::Kill);
if (!IsThumb)
MIB.addImm(0);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
}
// .Lloadcmp:
// ldrex rDest, [rAddr]
// cmp rDest, rDesired
// bne .Ldone
- LoadCmpBB->addLiveIn(Addr.getReg());
- LoadCmpBB->addLiveIn(Dest.getReg());
- LoadCmpBB->addLiveIn(Desired.getReg());
- addPostLoopLiveIns(LoadCmpBB, LiveRegs);
MachineInstrBuilder MIB;
MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg());
- MIB.addReg(Addr.getReg());
+ MIB.addReg(AddrReg);
if (LdrexOp == ARM::t2LDREX)
MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset.
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
- AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
- .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
- .addOperand(Desired));
+ BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
+ .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
+ .addReg(DesiredReg)
+ .add(predOps(ARMCC::AL));
unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
BuildMI(LoadCmpBB, DL, TII->get(Bcc))
.addMBB(DoneBB)
@@ -829,25 +821,21 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
LoadCmpBB->addSuccessor(StoreBB);
// .Lstore:
- // strex rStatus, rNew, [rAddr]
- // cmp rStatus, #0
+ // strex rTempReg, rNew, [rAddr]
+ // cmp rTempReg, #0
// bne .Lloadcmp
- StoreBB->addLiveIn(Addr.getReg());
- StoreBB->addLiveIn(New.getReg());
- addPostLoopLiveIns(StoreBB, LiveRegs);
-
-
- MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg);
- MIB.addOperand(New);
- MIB.addOperand(Addr);
+ MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg)
+ .addReg(NewReg)
+ .addReg(AddrReg);
if (StrexOp == ARM::t2STREX)
MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset.
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
- AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri))
- .addReg(StatusReg, RegState::Kill)
- .addImm(0));
+ BuildMI(StoreBB, DL, TII->get(CMPri))
+ .addReg(TempReg, RegState::Kill)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
BuildMI(StoreBB, DL, TII->get(Bcc))
.addMBB(LoadCmpBB)
.addImm(ARMCC::NE)
@@ -857,12 +845,24 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
DoneBB->transferSuccessors(&MBB);
- addPostLoopLiveIns(DoneBB, LiveRegs);
MBB.addSuccessor(LoadCmpBB);
NextMBBI = MBB.end();
MI.eraseFromParent();
+
+ // Recompute livein lists.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ LivePhysRegs LiveRegs;
+ computeLiveIns(LiveRegs, MRI, *DoneBB);
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+ // Do an extra pass around the loop to get loop carried registers right.
+ StoreBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ LoadCmpBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+
return true;
}
@@ -889,20 +889,19 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
MachineOperand &Dest = MI.getOperand(0);
- unsigned StatusReg = MI.getOperand(1).getReg();
- MachineOperand &Addr = MI.getOperand(2);
- MachineOperand &Desired = MI.getOperand(3);
- MachineOperand &New = MI.getOperand(4);
+ unsigned TempReg = MI.getOperand(1).getReg();
+ // Duplicating undef operands into 2 instructions does not guarantee the same
+ // value on both; However undef should be replaced by xzr anyway.
+ assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
+ unsigned AddrReg = MI.getOperand(2).getReg();
+ unsigned DesiredReg = MI.getOperand(3).getReg();
+ MachineOperand New = MI.getOperand(4);
+ New.setIsKill(false);
unsigned DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0);
unsigned DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1);
- unsigned DesiredLo = TRI->getSubReg(Desired.getReg(), ARM::gsub_0);
- unsigned DesiredHi = TRI->getSubReg(Desired.getReg(), ARM::gsub_1);
-
- LivePhysRegs LiveRegs(&TII->getRegisterInfo());
- LiveRegs.addLiveOuts(MBB);
- for (auto I = std::prev(MBB.end()); I != MBBI; --I)
- LiveRegs.stepBackward(*I);
+ unsigned DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0);
+ unsigned DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1);
MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
@@ -916,28 +915,23 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
// .Lloadcmp:
// ldrexd rDestLo, rDestHi, [rAddr]
// cmp rDestLo, rDesiredLo
- // sbcs rStatus<dead>, rDestHi, rDesiredHi
+ // sbcs rTempReg<dead>, rDestHi, rDesiredHi
// bne .Ldone
- LoadCmpBB->addLiveIn(Addr.getReg());
- LoadCmpBB->addLiveIn(Dest.getReg());
- LoadCmpBB->addLiveIn(Desired.getReg());
- addPostLoopLiveIns(LoadCmpBB, LiveRegs);
-
unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD;
MachineInstrBuilder MIB;
MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD));
addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI);
- MIB.addReg(Addr.getReg());
- AddDefaultPred(MIB);
+ MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
- AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
- .addReg(DestLo, getKillRegState(Dest.isDead()))
- .addReg(DesiredLo, getKillRegState(Desired.isDead())));
+ BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
+ .addReg(DestLo, getKillRegState(Dest.isDead()))
+ .addReg(DesiredLo)
+ .add(predOps(ARMCC::AL));
BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
.addReg(DestHi, getKillRegState(Dest.isDead()))
- .addReg(DesiredHi, getKillRegState(Desired.isDead()))
+ .addReg(DesiredHi)
.addImm(ARMCC::EQ).addReg(ARM::CPSR, RegState::Kill);
unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
@@ -949,23 +943,19 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
LoadCmpBB->addSuccessor(StoreBB);
// .Lstore:
- // strexd rStatus, rNewLo, rNewHi, [rAddr]
- // cmp rStatus, #0
+ // strexd rTempReg, rNewLo, rNewHi, [rAddr]
+ // cmp rTempReg, #0
// bne .Lloadcmp
- StoreBB->addLiveIn(Addr.getReg());
- StoreBB->addLiveIn(New.getReg());
- addPostLoopLiveIns(StoreBB, LiveRegs);
-
unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
- MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg);
+ MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg);
addExclusiveRegPair(MIB, New, 0, IsThumb, TRI);
- MIB.addOperand(Addr);
- AddDefaultPred(MIB);
+ MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
- AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri))
- .addReg(StatusReg, RegState::Kill)
- .addImm(0));
+ BuildMI(StoreBB, DL, TII->get(CMPri))
+ .addReg(TempReg, RegState::Kill)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
BuildMI(StoreBB, DL, TII->get(Bcc))
.addMBB(LoadCmpBB)
.addImm(ARMCC::NE)
@@ -975,12 +965,24 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
DoneBB->transferSuccessors(&MBB);
- addPostLoopLiveIns(DoneBB, LiveRegs);
MBB.addSuccessor(LoadCmpBB);
NextMBBI = MBB.end();
MI.eraseFromParent();
+
+ // Recompute livein lists.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ LivePhysRegs LiveRegs;
+ computeLiveIns(LiveRegs, MRI, *DoneBB);
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+ // Do an extra pass around the loop to get loop carried registers right.
+ StoreBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ LoadCmpBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+
return true;
}
@@ -1026,7 +1028,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Add the default predicate in Thumb mode.
if (STI->isThumb())
- MIB.addImm(ARMCC::AL).addReg(0);
+ MIB.add(predOps(ARMCC::AL));
} else if (RetOpcode == ARM::TCRETURNri) {
BuildMI(MBB, MBBI, dl,
TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr))
@@ -1047,9 +1049,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
MI.getOperand(1).getReg())
- .addOperand(MI.getOperand(2))
- .addImm(MI.getOperand(3).getImm()) // 'pred'
- .addOperand(MI.getOperand(4));
+ .add(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .add(MI.getOperand(4));
MI.eraseFromParent();
return true;
@@ -1059,10 +1061,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
- .addOperand(MI.getOperand(2))
- .addImm(MI.getOperand(3).getImm()) // 'pred'
- .addOperand(MI.getOperand(4))
- .addReg(0); // 's' bit
+ .add(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .add(MI.getOperand(4))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
@@ -1070,11 +1072,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVCCsi: {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
(MI.getOperand(1).getReg()))
- .addOperand(MI.getOperand(2))
- .addImm(MI.getOperand(3).getImm())
- .addImm(MI.getOperand(4).getImm()) // 'pred'
- .addOperand(MI.getOperand(5))
- .addReg(0); // 's' bit
+ .add(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm())
+ .addImm(MI.getOperand(4).getImm()) // 'pred'
+ .add(MI.getOperand(5))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
@@ -1082,12 +1084,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVCCsr: {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
(MI.getOperand(1).getReg()))
- .addOperand(MI.getOperand(2))
- .addOperand(MI.getOperand(3))
- .addImm(MI.getOperand(4).getImm())
- .addImm(MI.getOperand(5).getImm()) // 'pred'
- .addOperand(MI.getOperand(6))
- .addReg(0); // 's' bit
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .addImm(MI.getOperand(4).getImm())
+ .addImm(MI.getOperand(5).getImm()) // 'pred'
+ .add(MI.getOperand(6))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
@@ -1097,9 +1099,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
MI.getOperand(1).getReg())
- .addImm(MI.getOperand(2).getImm())
- .addImm(MI.getOperand(3).getImm()) // 'pred'
- .addOperand(MI.getOperand(4));
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .add(MI.getOperand(4));
MI.eraseFromParent();
return true;
}
@@ -1108,10 +1110,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
- .addImm(MI.getOperand(2).getImm())
- .addImm(MI.getOperand(3).getImm()) // 'pred'
- .addOperand(MI.getOperand(4))
- .addReg(0); // 's' bit
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .add(MI.getOperand(4))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
@@ -1121,10 +1123,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
- .addImm(MI.getOperand(2).getImm())
- .addImm(MI.getOperand(3).getImm()) // 'pred'
- .addOperand(MI.getOperand(4))
- .addReg(0); // 's' bit
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .add(MI.getOperand(4))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
@@ -1143,11 +1145,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
}
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
MI.getOperand(1).getReg())
- .addOperand(MI.getOperand(2))
- .addImm(MI.getOperand(3).getImm())
- .addImm(MI.getOperand(4).getImm()) // 'pred'
- .addOperand(MI.getOperand(5))
- .addReg(0); // 's' bit
+ .add(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm())
+ .addImm(MI.getOperand(4).getImm()) // 'pred'
+ .add(MI.getOperand(5))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
}
@@ -1187,10 +1189,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
"bits set.");
unsigned bicOpc = AFI->isThumbFunction() ?
ARM::t2BICri : ARM::BICri;
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(bicOpc), ARM::R6)
- .addReg(ARM::R6, RegState::Kill)
- .addImm(MaxAlign-1)));
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6)
+ .addReg(ARM::R6, RegState::Kill)
+ .addImm(MaxAlign - 1)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
}
@@ -1201,24 +1204,25 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVsrl_flag:
case ARM::MOVsra_flag: {
// These are just fancy MOVs instructions.
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
- MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1))
- .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ?
- ARM_AM::lsr : ARM_AM::asr),
- 1)))
- .addReg(ARM::CPSR, RegState::Define);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc(
+ (Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr : ARM_AM::asr), 1))
+ .add(predOps(ARMCC::AL))
+ .addReg(ARM::CPSR, RegState::Define);
MI.eraseFromParent();
return true;
}
case ARM::RRX: {
// This encodes as "MOVs Rd, Rm, rrx
MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),TII->get(ARM::MOVsi),
- MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1))
- .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
- .addReg(0);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
@@ -1241,18 +1245,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
if (!Thumb)
MIB.addImm(0);
- MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+ MIB.add(predOps(ARMCC::AL));
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(Thumb ? ARM::tBLXr : ARM::BLX));
if (Thumb)
- MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+ MIB.add(predOps(ARMCC::AL));
MIB.addReg(Reg, RegState::Kill);
} else {
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(Thumb ? ARM::tBL : ARM::BL));
if (Thumb)
- MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+ MIB.add(predOps(ARMCC::AL));
MIB.addExternalSymbol("__aeabi_read_tp", 0);
}
@@ -1268,15 +1272,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
MachineInstrBuilder MIB1 =
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(NewLdOpc), DstReg)
- .addOperand(MI.getOperand(1)));
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg)
+ .add(MI.getOperand(1))
+ .add(predOps(ARMCC::AL));
MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(ARM::tPICADD))
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg)
- .addOperand(MI.getOperand(2));
+ MachineInstrBuilder MIB2 =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .add(MI.getOperand(2));
TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
@@ -1319,7 +1323,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
if (IsARM)
MIB.addImm(0);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
if (IsPIC) {
MachineInstrBuilder MIB =
@@ -1329,7 +1333,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.addImm(ARMPCLabelIndex);
if (IsARM)
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
}
MI.eraseFromParent();
@@ -1368,7 +1372,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg).addImm(LabelId);
if (isARM) {
- AddDefaultPred(MIB3);
+ MIB3.add(predOps(ARMCC::AL));
if (Opcode == ARM::MOV_ga_pcrel_ldr)
MIB3->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
}
@@ -1388,9 +1392,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
.addReg(ARM::LR)
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(2))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
.addReg(ARM::CPSR, RegState::Undef);
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
@@ -1407,11 +1411,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
// Copy the source register.
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Add the destination operands (D subregs).
unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
@@ -1438,11 +1442,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
// Copy the destination register.
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Add the source operands (D subregs).
unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index df4dcb3..bf00ef6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
@@ -21,30 +22,61 @@
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include <cassert>
+#include <cstdint>
+#include <utility>
+
using namespace llvm;
namespace {
@@ -54,24 +86,22 @@ namespace {
enum {
RegBase,
FrameIndexBase
- } BaseType;
+ } BaseType = RegBase;
union {
unsigned Reg;
int FI;
} Base;
- int Offset;
+ int Offset = 0;
// Innocuous defaults for our address.
- Address()
- : BaseType(RegBase), Offset(0) {
- Base.Reg = 0;
- }
+ Address() {
+ Base.Reg = 0;
+ }
} Address;
class ARMFastISel final : public FastISel {
-
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
const ARMSubtarget *Subtarget;
@@ -99,8 +129,9 @@ class ARMFastISel final : public FastISel {
Context = &funcInfo.Fn->getContext();
}
- // Code from FastISel.cpp.
private:
+ // Code from FastISel.cpp.
+
unsigned fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill);
@@ -117,18 +148,18 @@ class ARMFastISel final : public FastISel {
uint64_t Imm);
// Backend specific FastISel code.
- private:
+
bool fastSelectInstruction(const Instruction *I) override;
unsigned fastMaterializeConstant(const Constant *C) override;
unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) override;
bool fastLowerArguments() override;
- private:
+
#include "ARMGenFastISel.inc"
// Instruction selection routines.
- private:
+
bool SelectLoad(const Instruction *I);
bool SelectStore(const Instruction *I);
bool SelectBranch(const Instruction *I);
@@ -151,12 +182,12 @@ class ARMFastISel final : public FastISel {
bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
// Utility routines.
- private:
+
bool isPositionIndependent() const;
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
- bool isZExt);
+ bool isZExt, bool isEquality);
bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment = 0, bool isZExt = true,
bool allocReg = true);
@@ -179,7 +210,7 @@ class ARMFastISel final : public FastISel {
const TargetLowering *getTargetLowering() { return &TLI; }
// Call handling routines.
- private:
+
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
bool Return,
bool isVarArg);
@@ -198,7 +229,7 @@ class ARMFastISel final : public FastISel {
bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
// OptionalDef handling routines.
- private:
+
bool isARMNEONPred(const MachineInstr *MI);
bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
@@ -219,8 +250,7 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
return false;
// Look to see if our OptionalDef is defining CPSR or CCR.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef()) continue;
if (MO.getReg() == ARM::CPSR)
*CPSR = true;
@@ -236,8 +266,8 @@ bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
AFI->isThumb2Function())
return MI->isPredicable();
- for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
- if (MCID.OpInfo[i].isPredicate())
+ for (const MCOperandInfo &opInfo : MCID.operands())
+ if (opInfo.isPredicate())
return true;
return false;
@@ -256,17 +286,13 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
// Are we NEON in ARM mode and have a predicate operand? If so, I know
// we're not predicable but add it anyways.
if (isARMNEONPred(MI))
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
// Do we optionally set a predicate? Preds is size > 0 iff the predicate
// defines CPSR. All other OptionalDefines in ARM are the CCR register.
bool CPSR = false;
- if (DefinesOptionalPredicate(MI, &CPSR)) {
- if (CPSR)
- AddDefaultT1CC(MIB);
- else
- AddDefaultCC(MIB);
- }
+ if (DefinesOptionalPredicate(MI, &CPSR))
+ MIB.add(CPSR ? t1CondCodeOp() : condCodeOp());
return MIB;
}
@@ -434,7 +460,6 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
}
unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
-
if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
return 0;
@@ -739,7 +764,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
- for (;;) {
+ while (true) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
@@ -971,7 +996,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
// Create the base instruction, then add the operands.
if (allocReg)
ResultReg = createResultReg(RC);
- assert (ResultReg > 255 && "Expected an allocated virtual register.");
+ assert(ResultReg > 255 && "Expected an allocated virtual register.");
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg);
AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
@@ -1216,7 +1241,6 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
// behavior.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
-
// Get the compare predicate.
// Try to take advantage of fallthrough opportunities.
CmpInst::Predicate Predicate = CI->getPredicate();
@@ -1231,7 +1255,8 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
if (ARMPred == ARMCC::AL) return false;
// Emit the compare.
- if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
+ CI->isEquality()))
return false;
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
@@ -1318,14 +1343,16 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
}
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
- bool isZExt) {
+ bool isZExt, bool isEquality) {
Type *Ty = Src1Value->getType();
EVT SrcEVT = TLI.getValueType(DL, Ty, true);
if (!SrcEVT.isSimple()) return false;
MVT SrcVT = SrcEVT.getSimpleVT();
- bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
- if (isFloat && !Subtarget->hasVFP2())
+ if (Ty->isFloatTy() && !Subtarget->hasVFP2())
+ return false;
+
+ if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
return false;
// Check to see if the 2nd operand is a constant that we can encode directly
@@ -1364,10 +1391,18 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
// TODO: Verify compares.
case MVT::f32:
isICmp = false;
- CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
+ // Equality comparisons shouldn't raise Invalid on uordered inputs.
+ if (isEquality)
+ CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;
+ else
+ CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
break;
case MVT::f64:
isICmp = false;
+ // Equality comparisons shouldn't raise Invalid on uordered inputs.
+ if (isEquality)
+ CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;
+ else
CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
break;
case MVT::i1:
@@ -1444,7 +1479,8 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
if (ARMPred == ARMCC::AL) return false;
// Emit the compare.
- if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
+ CI->isEquality()))
return false;
// Now set a register based on the comparison. Explicitly set the predicates
@@ -1466,7 +1502,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
bool ARMFastISel::SelectFPExt(const Instruction *I) {
// Make sure we have VFP and that we're extending float to double.
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
Value *V = I->getOperand(0);
if (!I->getType()->isDoubleTy() ||
@@ -1485,7 +1521,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) {
bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
// Make sure we have VFP and that we're truncating double to float.
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
Value *V = I->getOperand(0);
if (!(I->getType()->isFloatTy() &&
@@ -1536,7 +1572,8 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
unsigned Opc;
if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
- else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
+ else if (Ty->isDoubleTy() && !Subtarget->isFPOnlySP())
+ Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
else return false;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
@@ -1561,7 +1598,8 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
unsigned Opc;
Type *OpTy = I->getOperand(0)->getType();
if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
- else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
+ else if (OpTy->isDoubleTy() && !Subtarget->isFPOnlySP())
+ Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
else return false;
// f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
@@ -1596,7 +1634,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
bool UseImm = false;
bool isNegativeImm = false;
if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
- assert (VT == MVT::i32 && "Expecting an i32.");
+ assert(VT == MVT::i32 && "Expecting an i32.");
Imm = (int)ConstInt->getValue().getZExtValue();
if (Imm < 0) {
isNegativeImm = true;
@@ -1663,7 +1701,8 @@ bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
// If we have integer div support we should have selected this automagically.
// In case we have a real miss go ahead and return false and we'll pick
// it up later.
- if (Subtarget->hasDivide()) return false;
+ if (Subtarget->hasDivideInThumbMode())
+ return false;
// Otherwise emit a libcall.
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -1765,8 +1804,9 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
// if we have them.
// FIXME: It'd be nice to use NEON instructions.
Type *Ty = I->getType();
- bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
- if (isFloat && !Subtarget->hasVFP2())
+ if (Ty->isFloatTy() && !Subtarget->hasVFP2())
+ return false;
+ if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
return false;
unsigned Opc;
@@ -1908,7 +1948,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AdjStackDown))
- .addImm(NumBytes));
+ .addImm(NumBytes).addImm(0));
// Process the args.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -1926,16 +1966,16 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
case CCValAssign::SExt: {
MVT DestVT = VA.getLocVT();
Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
- assert (Arg != 0 && "Failed to emit a sext");
+ assert(Arg != 0 && "Failed to emit a sext");
ArgVT = DestVT;
break;
}
case CCValAssign::AExt:
- // Intentional fall-through. Handle AExt and ZExt.
+ // Intentional fall-through. Handle AExt and ZExt.
case CCValAssign::ZExt: {
MVT DestVT = VA.getLocVT();
Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
- assert (Arg != 0 && "Failed to emit a zext");
+ assert(Arg != 0 && "Failed to emit a zext");
ArgVT = DestVT;
break;
}
@@ -1960,6 +2000,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
assert(VA.getLocVT() == MVT::f64 &&
"Custom lowering for v2f64 args not available");
+ // FIXME: ArgLocs[++i] may extend beyond ArgLocs.size()
CCValAssign &NextVA = ArgLocs[++i];
assert(VA.isRegLoc() && NextVA.isRegLoc() &&
@@ -2131,8 +2172,8 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(RetOpc));
AddOptionalDefs(MIB);
- for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
- MIB.addReg(RetRegs[i], RegState::Implicit);
+ for (unsigned R : RetRegs)
+ MIB.addReg(R, RegState::Implicit);
return true;
}
@@ -2192,8 +2233,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
ArgRegs.reserve(I->getNumOperands());
ArgVTs.reserve(I->getNumOperands());
ArgFlags.reserve(I->getNumOperands());
- for (unsigned i = 0; i < I->getNumOperands(); ++i) {
- Value *Op = I->getOperand(i);
+ for (Value *Op : I->operands()) {
unsigned Arg = getRegForValue(Op);
if (Arg == 0) return false;
@@ -2230,15 +2270,15 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
DbgLoc, TII.get(CallOpc));
// BL / BLX don't take a predicate, but tBL / tBLX do.
if (isThumb2)
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
if (Subtarget->genLongCalls())
MIB.addReg(CalleeReg);
else
MIB.addExternalSymbol(TLI.getLibcallName(Call));
// Add implicit physical register uses to the call.
- for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
- MIB.addReg(RegArgs[i], RegState::Implicit);
+ for (unsigned R : RegArgs)
+ MIB.addReg(R, RegState::Implicit);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
@@ -2311,19 +2351,19 @@ bool ARMFastISel::SelectCall(const Instruction *I,
break;
ISD::ArgFlagsTy Flags;
- unsigned AttrInd = i - CS.arg_begin() + 1;
- if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ unsigned ArgIdx = i - CS.arg_begin();
+ if (CS.paramHasAttr(ArgIdx, Attribute::SExt))
Flags.setSExt();
- if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ if (CS.paramHasAttr(ArgIdx, Attribute::ZExt))
Flags.setZExt();
// FIXME: Only handle *easy* calls for now.
- if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
- CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
- CS.paramHasAttr(AttrInd, Attribute::SwiftSelf) ||
- CS.paramHasAttr(AttrInd, Attribute::SwiftError) ||
- CS.paramHasAttr(AttrInd, Attribute::Nest) ||
- CS.paramHasAttr(AttrInd, Attribute::ByVal))
+ if (CS.paramHasAttr(ArgIdx, Attribute::InReg) ||
+ CS.paramHasAttr(ArgIdx, Attribute::StructRet) ||
+ CS.paramHasAttr(ArgIdx, Attribute::SwiftSelf) ||
+ CS.paramHasAttr(ArgIdx, Attribute::SwiftError) ||
+ CS.paramHasAttr(ArgIdx, Attribute::Nest) ||
+ CS.paramHasAttr(ArgIdx, Attribute::ByVal))
return false;
Type *ArgTy = (*i)->getType();
@@ -2373,7 +2413,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// ARM calls don't take a predicate, but tBL / tBLX do.
if(isThumb2)
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
if (UseReg)
MIB.addReg(CalleeReg);
else if (!IntrMemName)
@@ -2382,8 +2422,8 @@ bool ARMFastISel::SelectCall(const Instruction *I,
MIB.addExternalSymbol(IntrMemName, 0);
// Add implicit physical register uses to the call.
- for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
- MIB.addReg(RegArgs[i], RegState::Implicit);
+ for (unsigned R : RegArgs)
+ MIB.addReg(R, RegState::Implicit);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
@@ -2418,7 +2458,7 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
else if (Len >= 2)
VT = MVT::i16;
else {
- assert (Len == 1 && "Expected a length of 1!");
+ assert(Len == 1 && "Expected a length of 1!");
VT = MVT::i8;
}
} else {
@@ -2433,9 +2473,9 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
bool RV;
unsigned ResultReg;
RV = ARMEmitLoad(VT, ResultReg, Src);
- assert (RV == true && "Should be able to handle this load.");
+ assert(RV && "Should be able to handle this load.");
RV = ARMEmitStore(VT, ResultReg, Dest);
- assert (RV == true && "Should be able to handle this store.");
+ assert(RV && "Should be able to handle this store.");
(void)RV;
unsigned Size = VT.getSizeInBits()/8;
@@ -2687,9 +2727,11 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
if (setsCPSR)
MIB.addReg(ARM::CPSR, RegState::Define);
SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);
- AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc));
+ MIB.addReg(SrcReg, isKill * RegState::Kill)
+ .addImm(ImmEnc)
+ .add(predOps(ARMCC::AL));
if (hasS)
- AddDefaultCC(MIB);
+ MIB.add(condCodeOp());
// Second instruction consumes the first's result.
SrcReg = ResultReg;
}
@@ -2779,7 +2821,6 @@ bool ARMFastISel::SelectShift(const Instruction *I,
// TODO: SoftFP support.
bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
-
switch (I->getOpcode()) {
case Instruction::Load:
return SelectLoad(I);
@@ -2849,6 +2890,7 @@ bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
}
namespace {
+
// This table describes sign- and zero-extend instructions which can be
// folded into a preceding load. All of these extends have an immediate
// (sometimes a mask and sometimes a shift) that's applied after
@@ -2865,7 +2907,8 @@ const struct FoldableLoadExtendsStruct {
{ { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 },
{ { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 }
};
-}
+
+} // end anonymous namespace
/// \brief The specified machine instr operand is a vreg, and that
/// vreg is being provided by the specified load instruction. If possible,
@@ -2888,13 +2931,12 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
bool Found = false;
bool isZExt;
- for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends);
- i != e; ++i) {
- if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() &&
- (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm &&
- MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) {
+ for (const FoldableLoadExtendsStruct &FLE : FoldableLoadExtends) {
+ if (FLE.Opc[isThumb2] == MI->getOpcode() &&
+ (uint64_t)FLE.ExpectedImm == Imm &&
+ MVT((MVT::SimpleValueType)FLE.ExpectedVT) == VT) {
Found = true;
- isZExt = FoldableLoadExtends[i].isZExt;
+ isZExt = FLE.isZExt;
}
}
if (!Found) return false;
@@ -2933,7 +2975,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
.addConstantPoolIndex(Idx);
if (Opc == ARM::LDRcp)
MIB.addImm(0);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
// Fix the address by adding pc.
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
@@ -2944,7 +2986,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
.addReg(TempReg)
.addImm(ARMPCLabelIndex);
if (!Subtarget->isThumb())
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
if (UseGOT_PREL && Subtarget->isThumb()) {
unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
@@ -2981,20 +3023,18 @@ bool ARMFastISel::fastLowerArguments() {
// Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
// which are passed in r0 - r3.
- unsigned Idx = 1;
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I, ++Idx) {
- if (Idx > 4)
+ for (const Argument &Arg : F->args()) {
+ if (Arg.getArgNo() >= 4)
return false;
- if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
- F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
- F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) ||
- F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) ||
- F->getAttributes().hasAttribute(Idx, Attribute::ByVal))
+ if (Arg.hasAttribute(Attribute::InReg) ||
+ Arg.hasAttribute(Attribute::StructRet) ||
+ Arg.hasAttribute(Attribute::SwiftSelf) ||
+ Arg.hasAttribute(Attribute::SwiftError) ||
+ Arg.hasAttribute(Attribute::ByVal))
return false;
- Type *ArgTy = I->getType();
+ Type *ArgTy = Arg.getType();
if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
return false;
@@ -3010,16 +3050,14 @@ bool ARMFastISel::fastLowerArguments() {
}
}
-
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
const TargetRegisterClass *RC = &ARM::rGPRRegClass;
- Idx = 0;
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I, ++Idx) {
- unsigned SrcReg = GPRArgRegs[Idx];
+ for (const Argument &Arg : F->args()) {
+ unsigned ArgNo = Arg.getArgNo();
+ unsigned SrcReg = GPRArgRegs[ArgNo];
unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
// Without this, EmitLiveInCopies may eliminate the livein if its only
@@ -3028,13 +3066,14 @@ bool ARMFastISel::fastLowerArguments() {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY),
ResultReg).addReg(DstReg, getKillRegState(true));
- updateValueMap(&*I, ResultReg);
+ updateValueMap(&Arg, ResultReg);
}
return true;
}
namespace llvm {
+
FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel())
@@ -3042,4 +3081,5 @@ namespace llvm {
return nullptr;
}
-}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/ARMFeatures.h b/contrib/llvm/lib/Target/ARM/ARMFeatures.h
index 0c910ab..8c0df4c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFeatures.h
+++ b/contrib/llvm/lib/Target/ARM/ARMFeatures.h
@@ -19,10 +19,10 @@
namespace llvm {
template<typename InstrType> // could be MachineInstr or MCInst
-bool IsCPSRDead(InstrType *Instr);
+bool IsCPSRDead(const InstrType *Instr);
template<typename InstrType> // could be MachineInstr or MCInst
-inline bool isV8EligibleForIT(InstrType *Instr) {
+inline bool isV8EligibleForIT(const InstrType *Instr) {
switch (Instr->getOpcode()) {
default:
return false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index c72db8a..16b54e8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -16,19 +16,49 @@
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+#include <vector>
#define DEBUG_TYPE "arm-frame-lowering"
@@ -180,6 +210,7 @@ static bool WindowsRequiresStackProbe(const MachineFunction &MF,
}
namespace {
+
struct StackAdjustingInsts {
struct InstInfo {
MachineBasicBlock::iterator I;
@@ -196,7 +227,8 @@ struct StackAdjustingInsts {
}
void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
- auto Info = find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
+ auto Info =
+ llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
assert(Info != Insts.end() && "invalid sp adjusting instruction");
Info->SPAdjust += ExtraBytes;
}
@@ -219,7 +251,8 @@ struct StackAdjustingInsts {
}
}
};
-}
+
+} // end anonymous namespace
/// Emit an instruction sequence that will align the address in
/// register Reg by zero-ing out the lower bits. For versions of the
@@ -252,38 +285,55 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
// lsr Reg, Reg, log2(Alignment)
// lsl Reg, Reg, log2(Alignment)
if (CanUseBFC) {
- AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(~AlignMask));
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(~AlignMask)
+ .add(predOps(ARMCC::AL));
} else if (AlignMask <= 255) {
- AddDefaultCC(
- AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(AlignMask)));
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(AlignMask)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
} else {
assert(!MustBeSingleInstruction &&
"Shouldn't call emitAligningInstructions demanding a single "
"instruction to be emitted for large stack alignment for a target "
"without BFC.");
- AddDefaultCC(AddDefaultPred(
- BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))));
- AddDefaultCC(AddDefaultPred(
- BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))));
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
} else {
// Since this is only reached for Thumb-2 targets, the BFC instruction
// should always be available.
assert(CanUseBFC);
- AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(~AlignMask));
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(~AlignMask)
+ .add(predOps(ARMCC::AL));
}
}
+/// We need the offset of the frame pointer relative to other MachineFrameInfo
+/// offsets which are encoded relative to SP at function begin.
+/// See also emitPrologue() for how the FP is set up.
+/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
+/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
+/// this to produce a conservative estimate that we check in an assert() later.
+static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
+ // This is a conservative estimation: Assume the frame pointer being r7 and
+ // pc("r15") up to r8 getting spilled before (= 8 registers).
+ return -AFI.getArgRegsSaveSize() - (8 * 4);
+}
+
void ARMFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -394,8 +444,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
int FramePtrOffsetInPush = 0;
if (HasFP) {
- FramePtrOffsetInPush =
- MFI.getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize;
+ int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
+ assert(getMaxFPOffset(*MF.getFunction(), *AFI) <= FPOffset &&
+ "Max FP estimation is wrong");
+ FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize;
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
}
@@ -448,9 +500,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
uint32_t NumWords = NumBytes >> 2;
if (NumWords < 65536)
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
- .addImm(NumWords)
- .setMIFlags(MachineInstr::FrameSetup));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
else
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
.addImm(NumWords)
@@ -462,10 +515,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
case CodeModel::Default:
case CodeModel::Kernel:
BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addExternalSymbol("__chkstk")
- .addReg(ARM::R4, RegState::Implicit)
- .setMIFlags(MachineInstr::FrameSetup);
+ .add(predOps(ARMCC::AL))
+ .addExternalSymbol("__chkstk")
+ .addReg(ARM::R4, RegState::Implicit)
+ .setMIFlags(MachineInstr::FrameSetup);
break;
case CodeModel::Large:
case CodeModel::JITDefault:
@@ -474,18 +527,19 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlags(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addReg(ARM::R12, RegState::Kill)
- .addReg(ARM::R4, RegState::Implicit)
- .setMIFlags(MachineInstr::FrameSetup);
+ .add(predOps(ARMCC::AL))
+ .addReg(ARM::R12, RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit)
+ .setMIFlags(MachineInstr::FrameSetup);
break;
}
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr),
- ARM::SP)
- .addReg(ARM::SP, RegState::Kill)
- .addReg(ARM::R4, RegState::Kill)
- .setMIFlags(MachineInstr::FrameSetup)));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
+ .addReg(ARM::SP, RegState::Kill)
+ .addReg(ARM::R4, RegState::Kill)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
NumBytes = 0;
}
@@ -657,12 +711,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// -- out lower bits in r4
// mov sp, r4
// FIXME: It will be better just to find spare register here.
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
- .addReg(ARM::SP, RegState::Kill));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
+ .addReg(ARM::SP, RegState::Kill)
+ .add(predOps(ARMCC::AL));
emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
false);
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
- .addReg(ARM::R4, RegState::Kill));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(ARM::R4, RegState::Kill)
+ .add(predOps(ARMCC::AL));
}
AFI->setShouldRestoreSPFromFP(true);
@@ -675,14 +731,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// FIXME: Clarify FrameSetup flags here.
if (RegInfo->hasBasePointer(MF)) {
if (isARM)
- BuildMI(MBB, MBBI, dl,
- TII.get(ARM::MOVr), RegInfo->getBaseRegister())
- .addReg(ARM::SP)
- .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
else
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
- RegInfo->getBaseRegister())
- .addReg(ARM::SP));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
}
// If the frame has variable sized objects then the epilogue must restore
@@ -757,19 +813,21 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
"No scratch register to restore SP from FP!");
emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
ARMCC::AL, 0, TII);
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
- ARM::SP)
- .addReg(ARM::R4));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(ARM::R4)
+ .add(predOps(ARMCC::AL));
}
} else {
// Thumb2 or ARM.
if (isARM)
BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
- .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ .addReg(FramePtr)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
else
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
- ARM::SP)
- .addReg(FramePtr));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(FramePtr)
+ .add(predOps(ARMCC::AL));
}
} else if (NumBytes &&
!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
@@ -829,7 +887,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
// When dynamically realigning the stack, use the frame pointer for
// parameters, and the stack/base pointer for locals.
if (RegInfo->needsStackRealignment(MF)) {
- assert (hasFP(MF) && "dynamic stack realignment without a FP!");
+ assert(hasFP(MF) && "dynamic stack realignment without a FP!");
if (isFixed) {
FrameReg = RegInfo->getFrameRegister(MF);
Offset = FPOffset;
@@ -910,8 +968,9 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
continue;
- bool isLiveIn = MF.getRegInfo().isLiveIn(Reg);
- if (!isLiveIn)
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool isLiveIn = MRI.isLiveIn(Reg);
+ if (!isLiveIn && !MRI.isReserved(Reg))
MBB.addLiveIn(Reg);
// If NoGap is true, push consecutive registers and then leave the rest
// for other instructions. e.g.
@@ -936,18 +995,19 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
});
if (Regs.size() > 1 || StrOpc== 0) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
- .addReg(ARM::SP).setMIFlags(MIFlags));
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
+ .addReg(ARM::SP)
+ .setMIFlags(MIFlags)
+ .add(predOps(ARMCC::AL));
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
} else if (Regs.size() == 1) {
- MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
- ARM::SP)
- .addReg(Regs[0].first, getKillRegState(Regs[0].second))
- .addReg(ARM::SP).setMIFlags(MIFlags)
- .addImm(-4);
- AddDefaultPred(MIB);
+ BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
+ .addReg(Regs[0].first, getKillRegState(Regs[0].second))
+ .addReg(ARM::SP)
+ .setMIFlags(MIFlags)
+ .addImm(-4)
+ .add(predOps(ARMCC::AL));
}
Regs.clear();
@@ -1027,9 +1087,9 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
});
if (Regs.size() > 1 || LdrOpc == 0) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
- .addReg(ARM::SP));
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i], getDefRegState(true));
if (DeleteRet && MI != MBB.end()) {
@@ -1053,7 +1113,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
} else
MIB.addImm(4);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
}
Regs.clear();
@@ -1114,9 +1174,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// sub r4, sp, #numregs * 8
// The immediate is <= 64, so it doesn't need any special encoding.
unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
- .addReg(ARM::SP)
- .addImm(8 * NumAlignedDPRCS2Regs)));
+ BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addReg(ARM::SP)
+ .addImm(8 * NumAlignedDPRCS2Regs)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment();
// We must set parameter MustBeSingleInstruction to true, since
@@ -1132,10 +1194,10 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// Leave r4 live, it is used below.
Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
- .addReg(ARM::R4);
- MIB = AddDefaultPred(MIB);
+ .addReg(ARM::R4)
+ .add(predOps(ARMCC::AL));
if (!isThumb)
- AddDefaultCC(MIB);
+ MIB.add(condCodeOp());
// Now spill NumAlignedDPRCS2Regs registers starting from d8.
// r4 holds the stack slot address.
@@ -1147,11 +1209,12 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QQPRRegClass);
MBB.addLiveIn(SupReg);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),
- ARM::R4)
- .addReg(ARM::R4, RegState::Kill).addImm(16)
- .addReg(NextReg)
- .addReg(SupReg, RegState::ImplicitKill));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(16)
+ .addReg(NextReg)
+ .addReg(SupReg, RegState::ImplicitKill)
+ .add(predOps(ARMCC::AL));
NextReg += 4;
NumAlignedDPRCS2Regs -= 4;
}
@@ -1165,9 +1228,12 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QQPRRegClass);
MBB.addLiveIn(SupReg);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
- .addReg(ARM::R4).addImm(16).addReg(NextReg)
- .addReg(SupReg, RegState::ImplicitKill));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
+ .addReg(ARM::R4)
+ .addImm(16)
+ .addReg(NextReg)
+ .addReg(SupReg, RegState::ImplicitKill)
+ .add(predOps(ARMCC::AL));
NextReg += 4;
NumAlignedDPRCS2Regs -= 4;
}
@@ -1177,8 +1243,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QPRRegClass);
MBB.addLiveIn(SupReg);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
- .addReg(ARM::R4).addImm(16).addReg(SupReg));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
+ .addReg(ARM::R4)
+ .addImm(16)
+ .addReg(SupReg)
+ .add(predOps(ARMCC::AL));
NextReg += 2;
NumAlignedDPRCS2Regs -= 2;
}
@@ -1187,9 +1256,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
if (NumAlignedDPRCS2Regs) {
MBB.addLiveIn(NextReg);
// vstr.64 uses addrmode5 which has an offset scale of 4.
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
- .addReg(NextReg)
- .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
+ .addReg(NextReg)
+ .addReg(ARM::R4)
+ .addImm((NextReg - R4BaseReg) * 2)
+ .add(predOps(ARMCC::AL));
}
// The last spill instruction inserted should kill the scratch register r4.
@@ -1254,8 +1325,11 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
- .addFrameIndex(D8SpillFI).addImm(0)));
+ BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addFrameIndex(D8SpillFI)
+ .addImm(0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
// Now restore NumAlignedDPRCS2Regs registers starting from d8.
unsigned NextReg = ARM::D8;
@@ -1264,10 +1338,12 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
if (NumAlignedDPRCS2Regs >= 6) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QQPRRegClass);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
- .addReg(ARM::R4, RegState::Define)
- .addReg(ARM::R4, RegState::Kill).addImm(16)
- .addReg(SupReg, RegState::ImplicitDefine));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
+ .addReg(ARM::R4, RegState::Define)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(16)
+ .addReg(SupReg, RegState::ImplicitDefine)
+ .add(predOps(ARMCC::AL));
NextReg += 4;
NumAlignedDPRCS2Regs -= 4;
}
@@ -1280,9 +1356,11 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
if (NumAlignedDPRCS2Regs >= 4) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QQPRRegClass);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
- .addReg(ARM::R4).addImm(16)
- .addReg(SupReg, RegState::ImplicitDefine));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
+ .addReg(ARM::R4)
+ .addImm(16)
+ .addReg(SupReg, RegState::ImplicitDefine)
+ .add(predOps(ARMCC::AL));
NextReg += 4;
NumAlignedDPRCS2Regs -= 4;
}
@@ -1291,16 +1369,20 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
if (NumAlignedDPRCS2Regs >= 2) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QPRRegClass);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
- .addReg(ARM::R4).addImm(16));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
+ .addReg(ARM::R4)
+ .addImm(16)
+ .add(predOps(ARMCC::AL));
NextReg += 2;
NumAlignedDPRCS2Regs -= 2;
}
// Finally, use a vanilla vldr.64 for the remaining odd register.
if (NumAlignedDPRCS2Regs)
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
- .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg)));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
+ .addReg(ARM::R4)
+ .addImm(2 * (NextReg - R4BaseReg))
+ .add(predOps(ARMCC::AL));
// Last store kills r4.
std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
@@ -1633,24 +1715,38 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// worth the effort and added fragility?
unsigned EstimatedStackSize =
MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
- if (hasFP(MF)) {
+
+ // Determine biggest (positive) SP offset in MachineFrameInfo.
+ int MaxFixedOffset = 0;
+ for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
+ int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
+ MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
+ }
+
+ bool HasFP = hasFP(MF);
+ if (HasFP) {
if (AFI->hasStackFrame())
EstimatedStackSize += 4;
} else {
// If FP is not used, SP will be used to access arguments, so count the
// size of arguments into the estimation.
- EstimatedStackSize += MF.getInfo<ARMFunctionInfo>()->getArgumentStackSize();
+ EstimatedStackSize += MaxFixedOffset;
}
EstimatedStackSize += 16; // For possible paddings.
- bool BigStack = EstimatedStackSize >= estimateRSStackSizeLimit(MF, this) ||
- MFI.hasVarSizedObjects() ||
- (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
+ unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
+ int MaxFPOffset = getMaxFPOffset(*MF.getFunction(), *AFI);
+ bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit ||
+ MFI.hasVarSizedObjects() ||
+ (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) ||
+ // For large argument stacks fp relative addressed may overflow.
+ (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit);
bool ExtraCSSpill = false;
- if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
+ if (BigFrameOffsets ||
+ !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
AFI->setHasStackFrame(true);
- if (hasFP(MF)) {
+ if (HasFP) {
SavedRegs.set(FramePtr);
// If the frame pointer is required by the ABI, also spill LR so that we
// emit a complete frame record.
@@ -1658,11 +1754,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(ARM::LR);
LRSpilled = true;
NumGPRSpills++;
- auto LRPos = find(UnspilledCS1GPRs, ARM::LR);
+ auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
if (LRPos != UnspilledCS1GPRs.end())
UnspilledCS1GPRs.erase(LRPos);
}
- auto FPPos = find(UnspilledCS1GPRs, FramePtr);
+ auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
if (FPPos != UnspilledCS1GPRs.end())
UnspilledCS1GPRs.erase(FPPos);
NumGPRSpills++;
@@ -1721,7 +1817,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
// r7 can be used if it is not being used as the frame pointer.
- if (!hasFP(MF)) {
+ if (!HasFP) {
if (SavedRegs.test(ARM::R7)) {
--RegDeficit;
DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = "
@@ -1773,7 +1869,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
NumGPRSpills++;
CS1Spilled = true;
ExtraCSSpill = true;
- UnspilledCS1GPRs.erase(find(UnspilledCS1GPRs, Reg));
+ UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
if (Reg == ARM::LR)
LRSpilled = true;
}
@@ -1786,7 +1882,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(ARM::LR);
NumGPRSpills++;
SmallVectorImpl<unsigned>::iterator LRPos;
- LRPos = find(UnspilledCS1GPRs, (unsigned)ARM::LR);
+ LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
if (LRPos != UnspilledCS1GPRs.end())
UnspilledCS1GPRs.erase(LRPos);
@@ -1831,7 +1927,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// callee-saved register or reserve a special spill slot to facilitate
// register scavenging. Thumb1 needs a spill slot for stack pointer
// adjustments also, even when the frame itself is small.
- if (BigStack && !ExtraCSSpill) {
+ if (BigFrameOffsets && !ExtraCSSpill) {
// If any non-reserved CS register isn't spilled, just spill one or two
// extra. That should take care of it!
unsigned NumExtras = TargetAlign / 4;
@@ -1865,10 +1961,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// note: Thumb1 functions spill to R12, not the stack. Reserve a slot
// closest to SP or frame pointer.
assert(RS && "Register scavenging not provided");
- const TargetRegisterClass *RC = &ARM::GPRRegClass;
- RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
+ const TargetRegisterClass &RC = ARM::GPRRegClass;
+ unsigned Size = TRI->getSpillSize(RC);
+ unsigned Align = TRI->getSpillAlignment(RC);
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
}
}
}
@@ -1890,7 +1986,7 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
// ADJCALLSTACKUP -> add, sp, sp, amount
MachineInstr &Old = *I;
DebugLoc dl = Old.getDebugLoc();
- unsigned Amount = Old.getOperand(0).getImm();
+ unsigned Amount = TII.getFrameSize(Old);
if (Amount != 0) {
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
@@ -1908,14 +2004,11 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
ARMCC::CondCodes Pred =
(PIdx == -1) ? ARMCC::AL
: (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
+ unsigned PredReg = TII.getFramePred(Old);
if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
- // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
- unsigned PredReg = Old.getOperand(2).getReg();
emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
Pred, PredReg);
} else {
- // Note: PredReg is operand 3 for ADJCALLSTACKUP.
- unsigned PredReg = Old.getOperand(3).getReg();
assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
Pred, PredReg);
@@ -2081,12 +2174,17 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// SR1: Scratch Register #1
// push {SR0, SR1}
if (Thumb) {
- AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH)))
- .addReg(ScratchReg0).addReg(ScratchReg1);
+ BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
} else {
- AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
- .addReg(ARM::SP, RegState::Define).addReg(ARM::SP))
- .addReg(ScratchReg0).addReg(ScratchReg1);
+ BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
}
// Emit the relevant DWARF information about the change in stack pointer as
@@ -2106,21 +2204,29 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// mov SR1, sp
if (Thumb) {
- AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
- .addReg(ARM::SP));
+ BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
} else if (CompareStackPointer) {
- AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
- .addReg(ARM::SP)).addReg(0);
+ BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
// sub SR1, sp, #StackSize
if (!CompareStackPointer && Thumb) {
- AddDefaultPred(
- AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1))
- .addReg(ScratchReg1).addImm(AlignedStackSize));
+ BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
+ .add(condCodeOp())
+ .addReg(ScratchReg1)
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL));
} else if (!CompareStackPointer) {
- AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
- .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0);
+ BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
+ .addReg(ARM::SP)
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
if (Thumb && ST->isThumb1Only()) {
@@ -2131,21 +2237,25 @@ void ARMFrameLowering::adjustForSegmentedStacks(
unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
// ldr SR0, [pc, offset(STACK_LIMIT)]
- AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
- .addConstantPoolIndex(CPI));
+ BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
+ .addConstantPoolIndex(CPI)
+ .add(predOps(ARMCC::AL));
// ldr SR0, [SR0]
- AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
- .addReg(ScratchReg0).addImm(0));
+ BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
+ .addReg(ScratchReg0)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
} else {
// Get TLS base address from the coprocessor
// mrc p15, #0, SR0, c13, c0, #3
- AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
- .addImm(15)
- .addImm(0)
- .addImm(13)
- .addImm(0)
- .addImm(3));
+ BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
+ .addImm(15)
+ .addImm(0)
+ .addImm(13)
+ .addImm(0)
+ .addImm(3)
+ .add(predOps(ARMCC::AL));
// Use the last tls slot on android and a private field of the TCP on linux.
assert(ST->isTargetAndroid() || ST->isTargetLinux());
@@ -2153,16 +2263,19 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Get the stack limit from the right offset
// ldr SR0, [sr0, #4 * TlsOffset]
- AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
- .addReg(ScratchReg0).addImm(4 * TlsOffset));
+ BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
+ .addReg(ScratchReg0)
+ .addImm(4 * TlsOffset)
+ .add(predOps(ARMCC::AL));
}
// Compare stack limit with stack size requested.
// cmp SR0, SR1
Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
- AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode))
- .addReg(ScratchReg0)
- .addReg(ScratchReg1));
+ BuildMI(GetMBB, DL, TII.get(Opcode))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1)
+ .add(predOps(ARMCC::AL));
// This jump is taken if StackLimit < SP - stack required.
Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
@@ -2178,32 +2291,40 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Pass first argument for the __morestack by Scratch Register #0.
// The amount size of stack required
if (Thumb) {
- AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8),
- ScratchReg0)).addImm(AlignedStackSize));
+ BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
+ .add(condCodeOp())
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL));
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
- .addImm(AlignedStackSize)).addReg(0);
+ BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
// Pass second argument for the __morestack by Scratch Register #1.
// The amount size of stack consumed to save function arguments.
if (Thumb) {
- AddDefaultPred(
- AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1))
- .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())));
+ BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
+ .add(condCodeOp())
+ .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
+ .add(predOps(ARMCC::AL));
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
- .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())))
- .addReg(0);
+ BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
+ .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
// push {lr} - Save return address of this function.
if (Thumb) {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH)))
+ BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
+ .add(predOps(ARMCC::AL))
.addReg(ARM::LR);
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP))
+ BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
.addReg(ARM::LR);
}
@@ -2220,7 +2341,8 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Call __morestack().
if (Thumb) {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL)))
+ BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
+ .add(predOps(ARMCC::AL))
.addExternalSymbol("__morestack");
} else {
BuildMI(AllocMBB, DL, TII.get(ARM::BL))
@@ -2230,22 +2352,26 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// pop {lr} - Restore return address of this original function.
if (Thumb) {
if (ST->isThumb1Only()) {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))
- .addReg(ScratchReg0);
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
- .addReg(ScratchReg0));
+ BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0);
+ BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
+ .addReg(ScratchReg0)
+ .add(predOps(ARMCC::AL));
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
- .addReg(ARM::LR, RegState::Define)
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP)
- .addImm(4));
+ BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .addImm(4)
+ .add(predOps(ARMCC::AL));
}
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP))
- .addReg(ARM::LR);
+ BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .addReg(ARM::LR);
}
// Restore SR0 and SR1 in case of __morestack() was called.
@@ -2253,15 +2379,17 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// scratch registers from here.
// pop {SR0, SR1}
if (Thumb) {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))
- .addReg(ScratchReg0)
- .addReg(ScratchReg1);
+ BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP))
- .addReg(ScratchReg0)
- .addReg(ScratchReg1);
+ BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
}
// Update the CFA offset now that we've popped
@@ -2271,20 +2399,22 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// bx lr - Return from this function.
Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET;
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode)));
+ BuildMI(AllocMBB, DL, TII.get(Opcode)).add(predOps(ARMCC::AL));
// Restore SR0 and SR1 in case of __morestack() was not called.
// pop {SR0, SR1}
if (Thumb) {
- AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP)))
- .addReg(ScratchReg0)
- .addReg(ScratchReg1);
+ BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
} else {
- AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP))
- .addReg(ScratchReg0)
- .addReg(ScratchReg1);
+ BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
}
// Update the CFA offset now that we've popped
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 7572955..f75dd4d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -228,11 +228,6 @@ private:
const uint16_t *DOpcodes,
const uint16_t *QOpcodes = nullptr);
- /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
- /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
- /// generated to force the table registers to be consecutive.
- void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
-
/// Try to select SBFX/UBFX instructions for ARM.
bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
@@ -244,11 +239,8 @@ private:
bool tryInlineAsm(SDNode *N);
- void SelectConcatVector(SDNode *N);
void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
- bool trySMLAWSMULW(SDNode *N);
-
void SelectCMP_SWAP(SDNode *N);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
@@ -547,11 +539,11 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
SDValue NewMulConst;
if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
HandleSDNode Handle(N);
+ SDLoc Loc(N);
replaceDAGValue(N.getOperand(1), NewMulConst);
BaseReg = Handle.getValue();
- Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
- PowerOfTwo),
- SDLoc(N), MVT::i32);
+ Opc = CurDAG->getTargetConstant(
+ ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
return true;
}
}
@@ -1866,6 +1858,14 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
return Opc; // If not one we handle, return it unchanged.
}
+/// Returns true if the given increment is a Constant known to be equal to the
+/// access size performed by a NEON load/store. This means the "[rN]!" form can
+/// be used.
+static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
+ auto C = dyn_cast<ConstantSDNode>(Inc);
+ return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
+}
+
void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes0,
@@ -1933,13 +1933,13 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue Inc = N->getOperand(AddrOpIdx + 1);
// FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
+ bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
+ if ((NumVecs <= 2) && !IsImmUpdate)
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
- if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
- !isa<ConstantSDNode>(Inc.getNode()))
- Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate)
+ Ops.push_back(IsImmUpdate ? Reg0 : Inc);
}
Ops.push_back(Pred);
Ops.push_back(Reg0);
@@ -2087,11 +2087,12 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue Inc = N->getOperand(AddrOpIdx + 1);
// FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+ bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
+ if (NumVecs <= 2 && !IsImmUpdate)
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
- if (!isa<ConstantSDNode>(Inc.getNode()))
+ if (!IsImmUpdate)
Ops.push_back(Inc);
else if (NumVecs > 2 && !isVSTfixed(Opc))
Ops.push_back(Reg0);
@@ -2221,7 +2222,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ bool IsImmUpdate =
+ isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
+ Ops.push_back(IsImmUpdate ? Reg0 : Inc);
}
SDValue SuperReg;
@@ -2325,9 +2328,11 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
// fixed-stride update instructions don't have an explicit writeback
// operand. It's implicit in the opcode itself.
SDValue Inc = N->getOperand(2);
- if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+ bool IsImmUpdate =
+ isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
+ if (NumVecs <= 2 && !IsImmUpdate)
Opc = getVLDSTRegisterUpdateOpcode(Opc);
- if (!isa<ConstantSDNode>(Inc.getNode()))
+ if (!IsImmUpdate)
Ops.push_back(Inc);
// FIXME: VLD3 and VLD4 haven't been updated to that form yet.
else if (NumVecs > 2)
@@ -2363,39 +2368,6 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
CurDAG->RemoveDeadNode(N);
}
-void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
- unsigned Opc) {
- assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- unsigned FirstTblReg = IsExt ? 2 : 1;
-
- // Form a REG_SEQUENCE to force register allocation.
- SDValue RegSeq;
- SDValue V0 = N->getOperand(FirstTblReg + 0);
- SDValue V1 = N->getOperand(FirstTblReg + 1);
- if (NumVecs == 2)
- RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
- else {
- SDValue V2 = N->getOperand(FirstTblReg + 2);
- // If it's a vtbl3, form a quad D-register and leave the last part as
- // an undef.
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
- : N->getOperand(FirstTblReg + 3);
- RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
- }
-
- SmallVector<SDValue, 6> Ops;
- if (IsExt)
- Ops.push_back(N->getOperand(1));
- Ops.push_back(RegSeq);
- Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
- Ops.push_back(getAL(CurDAG, dl)); // predicate
- Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
- ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
-}
-
bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
if (!Subtarget->hasV6T2Ops())
return false;
@@ -2563,141 +2535,6 @@ bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
return false;
}
-static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
- bool Accumulate) {
- // For SM*WB, we need to some form of sext.
- // For SM*WT, we need to search for (sra X, 16)
- // Src1 then gets set to X.
- if ((SignExt.getOpcode() == ISD::SIGN_EXTEND ||
- SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG ||
- SignExt.getOpcode() == ISD::AssertSext) &&
- SignExt.getValueType() == MVT::i32) {
-
- *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
- Src1 = SignExt.getOperand(0);
- return true;
- }
-
- if (SignExt.getOpcode() != ISD::SRA)
- return false;
-
- ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
- if (!SRASrc1 || SRASrc1->getZExtValue() != 16)
- return false;
-
- SDValue Op0 = SignExt.getOperand(0);
-
- // The sign extend operand for SM*WB could be generated by a shl and ashr.
- if (Op0.getOpcode() == ISD::SHL) {
- SDValue SHL = Op0;
- ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
- if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16)
- return false;
-
- *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
- Src1 = Op0.getOperand(0);
- return true;
- }
- *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
- Src1 = SignExt.getOperand(0);
- return true;
-}
-
-static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
- SDValue &Src1, bool Accumulate) {
- // First we look for:
- // (add (or (srl ?, 16), (shl ?, 16)))
- if (OR.getOpcode() != ISD::OR)
- return false;
-
- SDValue SRL = OR.getOperand(0);
- SDValue SHL = OR.getOperand(1);
-
- if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
- SRL = OR.getOperand(1);
- SHL = OR.getOperand(0);
- if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL)
- return false;
- }
-
- ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
- ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
- if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 ||
- SHLSrc1->getZExtValue() != 16)
- return false;
-
- // The first operands to the shifts need to be the two results from the
- // same smul_lohi node.
- if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
- SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
- return false;
-
- SDNode *SMULLOHI = SRL.getOperand(0).getNode();
- if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
- SHL.getOperand(0) != SDValue(SMULLOHI, 1))
- return false;
-
- // Now we have:
- // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
- // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
- // For SMLAWB the 16-bit value will signed extended somehow.
- // For SMLAWT only the SRA is required.
-
- // Check both sides of SMUL_LOHI
- if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) {
- Src0 = SMULLOHI->getOperand(1);
- } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1,
- Accumulate)) {
- Src0 = SMULLOHI->getOperand(0);
- } else {
- return false;
- }
- return true;
-}
-
-bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) {
- if (!Subtarget->hasV6Ops() ||
- (Subtarget->isThumb() && !Subtarget->hasThumb2()))
- return false;
-
- SDLoc dl(N);
- SDValue Src0 = N->getOperand(0);
- SDValue Src1 = N->getOperand(1);
- SDValue A, B;
- unsigned Opc = 0;
-
- if (N->getOpcode() == ISD::ADD) {
- if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR)
- return false;
-
- SDValue Acc;
- if (SearchSignedMulLong(Src0, &Opc, A, B, true)) {
- Acc = Src1;
- } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) {
- Acc = Src0;
- } else {
- return false;
- }
- if (Opc == 0)
- return false;
-
- SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl),
- CurDAG->getRegister(0, MVT::i32) };
- CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops);
- return true;
- } else if (N->getOpcode() == ISD::OR &&
- SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) {
- if (Opc == 0)
- return false;
-
- SDValue Ops[] = { A, B, getAL(CurDAG, dl),
- CurDAG->getRegister(0, MVT::i32)};
- CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
- return true;
- }
- return false;
-}
-
/// We've got special pseudo-instructions for these
void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
unsigned Opcode;
@@ -2726,15 +2563,6 @@ void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
CurDAG->RemoveDeadNode(N);
}
-void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
- // The only time a CONCAT_VECTORS operation can have legal types is when
- // two 64-bit vectors are concatenated to a 128-bit vector.
- EVT VT = N->getValueType(0);
- if (!VT.is128BitVector() || N->getNumOperands() != 2)
- llvm_unreachable("unexpected CONCAT_VECTORS");
- ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)));
-}
-
static Optional<std::pair<unsigned, unsigned>>
getContiguousRangeOfSetBits(const APInt &A) {
unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
@@ -2826,11 +2654,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
- case ISD::ADD:
- case ISD::OR:
- if (trySMLAWSMULW(N))
- return;
- break;
case ISD::WRITE_REGISTER:
if (tryWriteRegister(N))
return;
@@ -2859,9 +2682,12 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
SDNode *ResNode;
if (Subtarget->isThumb()) {
- SDValue Pred = getAL(CurDAG, dl);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
- SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
+ SDValue Ops[] = {
+ CPIdx,
+ getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getEntryNode()
+ };
ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
Ops);
} else {
@@ -2875,6 +2701,17 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
Ops);
}
+ // Annotate the Node with memory operand information so that MachineInstr
+ // queries work properly. This e.g. gives the register allocation the
+ // required information for rematerialization.
+ MachineFunction& MF = CurDAG->getMachineFunction();
+ MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
+ MemOp[0] = MF.getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(MF),
+ MachineMemOperand::MOLoad, 4, 4);
+
+ cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp+1);
+
ReplaceNode(N, ResNode);
return;
}
@@ -3046,49 +2883,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
break;
}
- case ARMISD::VMOVRRD:
- ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
- N->getOperand(0), getAL(CurDAG, dl),
- CurDAG->getRegister(0, MVT::i32)));
- return;
- case ISD::UMUL_LOHI: {
- if (Subtarget->isThumb1Only())
- break;
- if (Subtarget->isThumb()) {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
- ReplaceNode(
- N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops));
- return;
- } else {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
- CurDAG->getRegister(0, MVT::i32) };
- ReplaceNode(N, CurDAG->getMachineNode(
- Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl,
- MVT::i32, MVT::i32, Ops));
- return;
- }
- }
- case ISD::SMUL_LOHI: {
- if (Subtarget->isThumb1Only())
- break;
- if (Subtarget->isThumb()) {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
- ReplaceNode(
- N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops));
- return;
- } else {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
- CurDAG->getRegister(0, MVT::i32) };
- ReplaceNode(N, CurDAG->getMachineNode(
- Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl,
- MVT::i32, MVT::i32, Ops));
- return;
- }
- }
case ARMISD::UMAAL: {
unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
@@ -3099,38 +2893,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
case ARMISD::UMLAL:{
- // UMAAL is similar to UMLAL but it adds two 32-bit values to the
- // 64-bit multiplication result.
- if (Subtarget->hasV6Ops() && Subtarget->hasDSP() &&
- N->getOperand(2).getOpcode() == ARMISD::ADDC &&
- N->getOperand(3).getOpcode() == ARMISD::ADDE) {
-
- SDValue Addc = N->getOperand(2);
- SDValue Adde = N->getOperand(3);
-
- if (Adde.getOperand(2).getNode() == Addc.getNode()) {
-
- ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0));
- ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1));
-
- if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0)
- {
- // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm
- // RdLo = one operand to be added, lower 32-bits of res
- // RdHi = other operand to be added, upper 32-bits of res
- // Rn = first multiply operand
- // Rm = second multiply operand
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- Addc.getOperand(0), Addc.getOperand(1),
- getAL(CurDAG, dl),
- CurDAG->getRegister(0, MVT::i32) };
- unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
- CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops);
- return;
- }
- }
- }
-
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG, dl),
@@ -3281,26 +3043,23 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
int64_t Addend = -C->getSExtValue();
SDNode *Add = nullptr;
- // In T2 mode, ADDS can be better than CMN if the immediate fits in a
+ // ADDS can be better than CMN if the immediate fits in a
// 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
// Outside that range we can just use a CMN which is 32-bit but has a
// 12-bit immediate range.
- if (Subtarget->isThumb2() && Addend < 1<<8) {
- SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
- CurDAG->getRegister(0, MVT::i32) };
- Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
- } else if (!Subtarget->isThumb2() && Addend < 1<<8) {
- // FIXME: Add T1 tADDi8 code.
- SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
- CurDAG->getTargetConstant(Addend, dl, MVT::i32),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
- Add = CurDAG->getMachineNode(ARM::tADDi8, dl, MVT::i32, Ops);
- } else if (!Subtarget->isThumb2() && Addend < 1<<3) {
- SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
- CurDAG->getTargetConstant(Addend, dl, MVT::i32),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
- Add = CurDAG->getMachineNode(ARM::tADDi3, dl, MVT::i32, Ops);
+ if (Addend < 1<<8) {
+ if (Subtarget->isThumb2()) {
+ SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
+ } else {
+ unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
+ SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
+ CurDAG->getTargetConstant(Addend, dl, MVT::i32),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
+ Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
+ }
}
if (Add) {
SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
@@ -3964,63 +3723,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
break;
}
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- switch (IntNo) {
- default:
- break;
-
- case Intrinsic::arm_neon_vtbl2:
- SelectVTBL(N, false, 2, ARM::VTBL2);
- return;
- case Intrinsic::arm_neon_vtbl3:
- SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
- return;
- case Intrinsic::arm_neon_vtbl4:
- SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
- return;
-
- case Intrinsic::arm_neon_vtbx2:
- SelectVTBL(N, true, 2, ARM::VTBX2);
- return;
- case Intrinsic::arm_neon_vtbx3:
- SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
- return;
- case Intrinsic::arm_neon_vtbx4:
- SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
- return;
- }
- break;
- }
-
- case ARMISD::VTBL1: {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- SDValue Ops[] = {N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), // Predicate
- CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
- ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops));
- return;
- }
- case ARMISD::VTBL2: {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
-
- // Form a REG_SEQUENCE to force register allocation.
- SDValue V0 = N->getOperand(0);
- SDValue V1 = N->getOperand(1);
- SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
-
- SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate
- CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
- ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops));
- return;
- }
-
- case ISD::CONCAT_VECTORS:
- SelectConcatVector(N);
- return;
-
case ISD::ATOMIC_CMP_SWAP:
SelectCMP_SWAP(N);
return;
@@ -4127,11 +3829,10 @@ static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
// The flags here are common to those allowed for apsr in the A class cores and
// those allowed for the special registers in the M class cores. Returns a
// value representing which flags were present, -1 if invalid.
-static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
- if (Flags.empty())
- return 0x2 | (int)hasDSP;
-
+static inline int getMClassFlagsMask(StringRef Flags) {
return StringSwitch<int>(Flags)
+ .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
+ // correct when flags are not permitted
.Case("g", 0x1)
.Case("nzcvq", 0x2)
.Case("nzcvqg", 0x3)
@@ -4174,7 +3875,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
}
// We know we are now handling a write so need to get the mask for the flags.
- int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
+ int Mask = getMClassFlagsMask(Flags);
// Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
// shouldn't have flags present.
@@ -4189,10 +3890,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
// The register was valid so need to put the mask in the correct place
// (the flags need to be in bits 11-10) and combine with the SYSmvalue to
// construct the operand for the instruction node.
- if (SYSmvalue < 0x4)
- return SYSmvalue | Mask << 10;
-
- return SYSmvalue;
+ return SYSmvalue | Mask << 10;
}
static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
@@ -4205,7 +3903,7 @@ static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
// The flags permitted for apsr are the same flags that are allowed in
// M class registers. We get the flag value and then shift the flags into
// the correct place to combine with the mask.
- Mask = getMClassFlagsMask(Flags, true);
+ Mask = getMClassFlagsMask(Flags);
if (Mask == -1)
return -1;
return Mask << 2;
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 0f84a23..27dda93 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -13,46 +13,100 @@
//===----------------------------------------------------------------------===//
#include "ARMISelLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMPerfectShuffle.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
-#include "ARMTargetMachine.h"
-#include "ARMTargetObjectFile.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <string>
+#include <tuple>
#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "arm-isel"
@@ -72,7 +126,7 @@ static cl::opt<bool> EnableConstpoolPromotion(
"arm-promote-constant", cl::Hidden,
cl::desc("Enable / disable promotion of unnamed_addr constants into "
"constant pools"),
- cl::init(true));
+ cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
static cl::opt<unsigned> ConstpoolPromotionMaxSize(
"arm-promote-constant-max-size", cl::Hidden,
cl::desc("Maximum size of constant to promote into a constant pool"),
@@ -82,21 +136,6 @@ static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
cl::desc("Maximum size of ALL constants to promote into a constant pool"),
cl::init(128));
-namespace {
- class ARMCCState : public CCState {
- public:
- ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
- SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
- ParmContext PC)
- : CCState(CC, isVarArg, MF, locs, C) {
- assert(((PC == Call) || (PC == Prologue)) &&
- "ARMCCState users must specify whether their context is call"
- "or prologue generation.");
- CallOrPrologue = PC;
- }
- };
-}
-
// The APCS parameter registers.
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
@@ -162,7 +201,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
if (!VT.isFloatingPoint() &&
VT != MVT::v2i64 && VT != MVT::v1i64)
- for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+ for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
}
@@ -433,9 +472,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
- if (Subtarget->isTargetWatchOS() ||
- (Subtarget->isTargetIOS() &&
- !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
+ if (Subtarget->isTargetMachO() &&
+ !(Subtarget->isTargetIOS() &&
+ Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
}
@@ -545,7 +584,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
- setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
@@ -563,7 +601,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
- setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
@@ -580,7 +617,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
- setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
@@ -685,10 +721,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
}
- // ARM and Thumb2 support UMLAL/SMLAL.
- if (!Subtarget->isThumb1Only())
- setTargetDAGCombine(ISD::ADDC);
-
if (Subtarget->isFPOnlySP()) {
// When targeting a floating-point unit with only single-precision
// operations, f64 is legal for the few double-precision instructions which
@@ -707,7 +739,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
- setOperationAction(ISD::FPOWI, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FLOG, MVT::f64, Expand);
setOperationAction(ISD::FLOG2, MVT::f64, Expand);
@@ -786,14 +817,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL, MVT::i64, Custom);
setOperationAction(ISD::SRA, MVT::i64, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
- if (!Subtarget->isThumb1Only()) {
- // FIXME: We should do this for Thumb1 as well.
- setOperationAction(ISD::ADDC, MVT::i32, Custom);
- setOperationAction(ISD::ADDE, MVT::i32, Custom);
- setOperationAction(ISD::SUBC, MVT::i32, Custom);
- setOperationAction(ISD::SUBE, MVT::i32, Custom);
- }
+ setOperationAction(ISD::ADDC, MVT::i32, Custom);
+ setOperationAction(ISD::ADDE, MVT::i32, Custom);
+ setOperationAction(ISD::SUBC, MVT::i32, Custom);
+ setOperationAction(ISD::SUBE, MVT::i32, Custom);
if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
@@ -820,7 +849,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide()
+ bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
: Subtarget->hasDivideInARMMode();
if (!hasDivide) {
// These are expanded into libcalls if the cpu doesn't have HW divider.
@@ -828,7 +857,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i32, LibCall);
}
- if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) {
+ if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
setOperationAction(ISD::SDIV, MVT::i32, Custom);
setOperationAction(ISD::UDIV, MVT::i32, Custom);
@@ -1305,6 +1334,16 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::UMAAL: return "ARMISD::UMAAL";
case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";
+ case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
+ case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
+ case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
+ case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
+ case ARMISD::SMULWB: return "ARMISD::SMULWB";
+ case ARMISD::SMULWT: return "ARMISD::SMULWT";
+ case ARMISD::SMLALD: return "ARMISD::SMLALD";
+ case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
+ case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
+ case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
@@ -1414,6 +1453,40 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
// Lowering Code
//===----------------------------------------------------------------------===//
+static bool isSRL16(const SDValue &Op) {
+ if (Op.getOpcode() != ISD::SRL)
+ return false;
+ if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return Const->getZExtValue() == 16;
+ return false;
+}
+
+static bool isSRA16(const SDValue &Op) {
+ if (Op.getOpcode() != ISD::SRA)
+ return false;
+ if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return Const->getZExtValue() == 16;
+ return false;
+}
+
+static bool isSHL16(const SDValue &Op) {
+ if (Op.getOpcode() != ISD::SHL)
+ return false;
+ if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return Const->getZExtValue() == 16;
+ return false;
+}
+
+// Check for a signed 16-bit value. We special case SRA because it makes it
+// more simple when also looking for SRAs that aren't sign extending a
+// smaller value. Without the check, we'd need to take extra care with
+// checking order for some operations.
+static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
+ if (isSRA16(Op))
+ return isSHL16(Op.getOperand(0));
+ return DAG.ComputeNumSignBits(Op) == 17;
+}
+
/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
switch (CC) {
@@ -1433,22 +1506,34 @@ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
- ARMCC::CondCodes &CondCode2) {
+ ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
CondCode2 = ARMCC::AL;
+ InvalidOnQNaN = true;
switch (CC) {
default: llvm_unreachable("Unknown FP condition!");
case ISD::SETEQ:
- case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+ case ISD::SETOEQ:
+ CondCode = ARMCC::EQ;
+ InvalidOnQNaN = false;
+ break;
case ISD::SETGT:
case ISD::SETOGT: CondCode = ARMCC::GT; break;
case ISD::SETGE:
case ISD::SETOGE: CondCode = ARMCC::GE; break;
case ISD::SETOLT: CondCode = ARMCC::MI; break;
case ISD::SETOLE: CondCode = ARMCC::LS; break;
- case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+ case ISD::SETONE:
+ CondCode = ARMCC::MI;
+ CondCode2 = ARMCC::GT;
+ InvalidOnQNaN = false;
+ break;
case ISD::SETO: CondCode = ARMCC::VC; break;
case ISD::SETUO: CondCode = ARMCC::VS; break;
- case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+ case ISD::SETUEQ:
+ CondCode = ARMCC::EQ;
+ CondCode2 = ARMCC::VS;
+ InvalidOnQNaN = false;
+ break;
case ISD::SETUGT: CondCode = ARMCC::HI; break;
case ISD::SETUGE: CondCode = ARMCC::PL; break;
case ISD::SETLT:
@@ -1456,7 +1541,10 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
case ISD::SETLE:
case ISD::SETULE: CondCode = ARMCC::LE; break;
case ISD::SETNE:
- case ISD::SETUNE: CondCode = ARMCC::NE; break;
+ case ISD::SETUNE:
+ CondCode = ARMCC::NE;
+ InvalidOnQNaN = false;
+ break;
}
}
@@ -1549,8 +1637,8 @@ SDValue ARMTargetLowering::LowerCallResult(
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
- *DAG.getContext(), Call);
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
// Copy all of the result registers out of their specified physreg.
@@ -1710,8 +1798,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext(), Call);
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
// Get a count of how many bytes are to be pushed on the stack.
@@ -1724,8 +1812,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!isSibCall)
- Chain = DAG.getCALLSEQ_START(Chain,
- DAG.getIntPtrConstant(NumBytes, dl, true), dl);
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
SDValue StackPtr =
DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
@@ -2088,10 +2175,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// this.
void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
unsigned Align) const {
- assert((State->getCallOrPrologue() == Prologue ||
- State->getCallOrPrologue() == Call) &&
- "unhandled ParmContext");
-
// Byval (as with any stack) slots are always at least 4 byte aligned.
Align = std::max(Align, 4U);
@@ -2148,7 +2231,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
const TargetInstrInfo *TII) {
unsigned Bytes = Arg.getValueSizeInBits() / 8;
- int FI = INT_MAX;
+ int FI = std::numeric_limits<int>::max();
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
if (!TargetRegisterInfo::isVirtualRegister(VR))
@@ -2178,7 +2261,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
} else
return false;
- assert(FI != INT_MAX);
+ assert(FI != std::numeric_limits<int>::max());
if (!MFI.isFixedObjectIndex(FI))
return false;
return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
@@ -2260,7 +2343,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C, Call);
+ CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
if (CCInfo.getNextStackOffset()) {
// Check if the arguments are already laid out in the right way as
@@ -2362,8 +2445,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slots.
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
- *DAG.getContext(), Call);
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
// Analyze outgoing return values.
CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
@@ -2550,7 +2633,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
return true;
}
-bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
if (!Subtarget->supportsTailCall())
return false;
@@ -2586,12 +2669,35 @@ static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOVi.
-static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
// FIXME there is no actual debug info here
SDLoc dl(Op);
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDValue Res;
+
+ // When generating execute-only code Constant Pools must be promoted to the
+ // global data section. It's a bit ugly that we can't share them across basic
+ // blocks, but this way we guarantee that execute-only behaves correct with
+ // position-independent addressing modes.
+ if (Subtarget->genExecuteOnly()) {
+ auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+ auto T = const_cast<Type*>(CP->getType());
+ auto C = const_cast<Constant*>(CP->getConstVal());
+ auto M = const_cast<Module*>(DAG.getMachineFunction().
+ getFunction()->getParent());
+ auto GV = new GlobalVariable(
+ *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
+ Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
+ Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
+ Twine(AFI->createPICLabelUId())
+ );
+ SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
+ dl, PtrVT);
+ return LowerGlobalAddress(GA, DAG);
+ }
+
if (CP->isMachineConstantPoolEntry())
Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
CP->getAlignment());
@@ -2790,9 +2896,9 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
// FIXME: is there useful debug info available here?
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
+ CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
+ CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
@@ -2935,7 +3041,7 @@ static bool isSimpleType(Type *T) {
}
static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
- EVT PtrVT, SDLoc dl) {
+ EVT PtrVT, const SDLoc &dl) {
// If we're creating a pool entry for a constant global with unnamed address,
// and the global is small enough, we can emit it inline into the constant pool
// to save ourselves an indirection.
@@ -2980,7 +3086,8 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
unsigned RequiredPadding = 4 - (Size % 4);
bool PaddingPossible =
RequiredPadding == 4 || (CDAInit && CDAInit->isString());
- if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize)
+ if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
+ Size == 0)
return SDValue();
unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
@@ -3034,6 +3141,19 @@ static bool isReadOnly(const GlobalValue *GV) {
isa<Function>(GV);
}
+SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Subtarget->getTargetTriple().getObjectFormat()) {
+ default: llvm_unreachable("unknown object format");
+ case Triple::COFF:
+ return LowerGlobalAddressWindows(Op, DAG);
+ case Triple::ELF:
+ return LowerGlobalAddressELF(Op, DAG);
+ case Triple::MachO:
+ return LowerGlobalAddressDarwin(Op, DAG);
+ }
+}
+
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -3080,15 +3200,22 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
return Result;
} else if (Subtarget->isRWPI() && !IsRO) {
// SB-relative.
- ARMConstantPoolValue *CPV =
- ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
- CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- SDValue G = DAG.getLoad(
- PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+ SDValue RelAddr;
+ if (Subtarget->useMovt(DAG.getMachineFunction())) {
+ ++NumMovwMovt;
+ SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
+ RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
+ } else { // use literal pool for address constant
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ RelAddr = DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+ }
SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
- SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, G);
+ SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
return Result;
}
@@ -3219,6 +3346,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
}
return Result;
}
+ case Intrinsic::arm_neon_vabs:
+ return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1));
case Intrinsic::arm_neon_vmulls:
case Intrinsic::arm_neon_vmullu: {
unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
@@ -3256,13 +3386,23 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
+ case Intrinsic::arm_neon_vtbl1:
+ return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::arm_neon_vtbl2:
+ return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
}
}
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
- // FIXME: handle "fence singlethread" more efficiently.
SDLoc dl(Op);
+ ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
+ auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
+ if (SSID == SyncScope::SingleThread)
+ return Op;
+
if (!Subtarget->hasDataBarrier()) {
// Some ARMv6 cpus can support data barriers with an mcr instruction.
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
@@ -3462,8 +3602,8 @@ SDValue ARMTargetLowering::LowerFormalArguments(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext(), Prologue);
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
SmallVector<SDValue, 16> ArgValues;
@@ -3595,7 +3735,6 @@ SDValue ARMTargetLowering::LowerFormalArguments(
InVals.push_back(ArgValue);
} else { // VA.isRegLoc()
-
// sanity check
assert(VA.isMemLoc());
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
@@ -3734,13 +3873,15 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, const SDLoc &dl) const {
+ SelectionDAG &DAG, const SDLoc &dl,
+ bool InvalidOnQNaN) const {
assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
+ SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
if (!isFloatingPointZero(RHS))
- Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
+ Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
else
- Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
}
@@ -3757,10 +3898,12 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
Cmp = Cmp.getOperand(0);
Opc = Cmp.getOpcode();
if (Opc == ARMISD::CMPFP)
- Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
+ Cmp.getOperand(1), Cmp.getOperand(2));
else {
assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
- Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
+ Cmp.getOperand(1));
}
return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
}
@@ -3808,7 +3951,6 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
return std::make_pair(Value, OverflowCmp);
}
-
SDValue
ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
// Let legalize expand this if it isn't a legal type yet.
@@ -3832,7 +3974,6 @@ ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
}
-
SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond = Op.getOperand(0);
SDValue SelectTrue = Op.getOperand(1);
@@ -4025,7 +4166,6 @@ static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
// Additionally, the variable is returned in parameter V and the constant in K.
static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
uint64_t &K) {
-
SDValue LHS1 = Op.getOperand(0);
SDValue RHS1 = Op.getOperand(1);
SDValue TrueVal1 = Op.getOperand(2);
@@ -4046,10 +4186,10 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
// in each conditional
SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
? &RHS1
- : NULL;
+ : nullptr;
SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
? &RHS2
- : NULL;
+ : nullptr;
SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
@@ -4073,13 +4213,15 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
const SDValue *LowerCheckOp =
isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
? &Op
- : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2
- : NULL;
+ : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
+ ? &Op2
+ : nullptr;
const SDValue *UpperCheckOp =
isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
? &Op
- : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2
- : NULL;
+ : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
+ ? &Op2
+ : nullptr;
if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
return false;
@@ -4104,7 +4246,6 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
}
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
-
EVT VT = Op.getValueType();
SDLoc dl(Op);
@@ -4162,7 +4303,8 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
ARMCC::CondCodes CondCode, CondCode2;
- FPCCToARMCC(CC, CondCode, CondCode2);
+ bool InvalidOnQNaN;
+ FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
// Try to generate VMAXNM/VMINNM on ARMv8.
if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
@@ -4181,13 +4323,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
- SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
}
return Result;
@@ -4348,10 +4490,11 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
}
ARMCC::CondCodes CondCode, CondCode2;
- FPCCToARMCC(CC, CondCode, CondCode2);
+ bool InvalidOnQNaN;
+ FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
@@ -4853,9 +4996,10 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract.
SDLoc dl(Op);
- SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
- DAG.getConstant(Intrinsic::arm_get_fpscr, dl,
- MVT::i32));
+ SDValue Ops[] = { DAG.getEntryNode(),
+ DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
+
+ SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
DAG.getConstant(1U << 22, dl, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
@@ -5212,15 +5356,15 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
// Integer comparisons.
switch (SetCCOpcode) {
default: llvm_unreachable("Illegal integer comparison");
- case ISD::SETNE: Invert = true;
+ case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
- case ISD::SETLT: Swap = true;
+ case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETGT: Opc = ARMISD::VCGT; break;
- case ISD::SETLE: Swap = true;
+ case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETGE: Opc = ARMISD::VCGE; break;
- case ISD::SETULT: Swap = true;
+ case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
- case ISD::SETULE: Swap = true;
+ case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
}
@@ -5584,7 +5728,6 @@ static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
return true;
}
-
static bool isVEXTMask(ArrayRef<int> M, EVT VT,
bool &ReverseVEXT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
@@ -5758,7 +5901,10 @@ static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- WhichResult = M[i] == 0 ? 0 : 1;
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
for (unsigned j = 0; j < NumElts; ++j) {
if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
return false;
@@ -5789,7 +5935,10 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
unsigned Half = NumElts / 2;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- WhichResult = M[i] == 0 ? 0 : 1;
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
for (unsigned j = 0; j < NumElts; j += Half) {
unsigned Idx = WhichResult;
for (unsigned k = 0; k < Half; ++k) {
@@ -5829,7 +5978,10 @@ static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- WhichResult = M[i] == 0 ? 0 : 1;
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned j = 0; j < NumElts; j += 2) {
if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
@@ -5862,7 +6014,10 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- WhichResult = M[i] == 0 ? 0 : 1;
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned j = 0; j < NumElts; j += 2) {
if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
@@ -6027,10 +6182,10 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
if (ValueCounts.size() != 1)
usesOnlyOneValue = false;
- if (!Value.getNode() && ValueCounts.size() > 0)
+ if (!Value.getNode() && !ValueCounts.empty())
Value = ValueCounts.begin()->first;
- if (ValueCounts.size() == 0)
+ if (ValueCounts.empty())
return DAG.getUNDEF(VT);
// Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
@@ -6182,8 +6337,8 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
struct ShuffleSourceInfo {
SDValue Vec;
- unsigned MinElt;
- unsigned MaxElt;
+ unsigned MinElt = std::numeric_limits<unsigned>::max();
+ unsigned MaxElt = 0;
// We may insert some combination of BITCASTs and VEXT nodes to force Vec to
// be compatible with the shuffle we intend to construct. As a result
@@ -6192,13 +6347,12 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// Code should guarantee that element i in Vec starts at element "WindowBase
// + i * WindowScale in ShuffleVec".
- int WindowBase;
- int WindowScale;
+ int WindowBase = 0;
+ int WindowScale = 1;
+
+ ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
- ShuffleSourceInfo(SDValue Vec)
- : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
- WindowScale(1) {}
};
// First gather all vectors used as an immediate source for this BUILD_VECTOR
@@ -6220,7 +6374,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// Add this element source to the list if it's not already there.
SDValue SourceVec = V.getOperand(0);
- auto Source = find(Sources, SourceVec);
+ auto Source = llvm::find(Sources, SourceVec);
if (Source == Sources.end())
Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
@@ -6336,7 +6490,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
if (Entry.isUndef())
continue;
- auto Src = find(Sources, Entry.getOperand(0));
+ auto Src = llvm::find(Sources, Entry.getOperand(0));
int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
// EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
@@ -6633,7 +6787,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
EVT SubVT = SubV1.getValueType();
// We expect these to have been canonicalized to -1.
- assert(all_of(ShuffleMask, [&](int i) {
+ assert(llvm::all_of(ShuffleMask, [&](int i) {
return i < (int)VT.getVectorNumElements();
}) && "Unexpected shuffle index into UNDEF operand!");
@@ -6896,8 +7050,19 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
N->getValueType(0),
N->getOpcode());
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
- return SkipLoadExtensionForVMULL(LD, DAG);
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
+ "Expected extending load");
+
+ SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
+ unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue extLoad =
+ DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
+
+ return newLoad;
+ }
// Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
// have been legalized as a BITCAST from v4i32.
@@ -7242,7 +7407,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Pair of floats / doubles used to pass the result.
- Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
+ Type *RetTy = StructType::get(ArgTy, ArgTy);
auto &DL = DAG.getDataLayout();
ArgListTy Args;
@@ -7258,9 +7423,9 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
ArgListEntry Entry;
Entry.Node = SRet;
Entry.Ty = RetTy->getPointerTo();
- Entry.isSExt = false;
- Entry.isZExt = false;
- Entry.isSRet = true;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Entry.IsSRet = true;
Args.push_back(Entry);
RetTy = Type::getVoidTy(*DAG.getContext());
}
@@ -7268,8 +7433,8 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
const char *LibcallName =
@@ -7427,6 +7592,9 @@ static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
SDValue VHi = DAG.getAnyExtOrTrunc(
DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
dl, MVT::i32);
+ bool isBigEndian = DAG.getDataLayout().isBigEndian();
+ if (isBigEndian)
+ std::swap (VLo, VHi);
SDValue RegClass =
DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
@@ -7454,10 +7622,14 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N,
MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
- Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32,
- SDValue(CmpSwap, 0)));
- Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32,
- SDValue(CmpSwap, 0)));
+ bool isBigEndian = DAG.getDataLayout().isBigEndian();
+
+ Results.push_back(
+ DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
+ SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
+ Results.push_back(
+ DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
+ SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
Results.push_back(SDValue(CmpSwap, 2));
}
@@ -7480,12 +7652,12 @@ static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
Entry.Node = Val;
Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isZExt = true;
+ Entry.IsZExt = true;
Args.push_back(Entry);
Entry.Node = Exponent;
Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isZExt = true;
+ Entry.IsZExt = true;
Args.push_back(Entry);
Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
@@ -7517,21 +7689,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
- case ISD::ConstantPool:
- if (Subtarget->genExecuteOnly())
- llvm_unreachable("execute-only should not generate constant pools");
- return LowerConstantPool(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
- case ISD::GlobalAddress:
- switch (Subtarget->getTargetTriple().getObjectFormat()) {
- default: llvm_unreachable("unknown object format");
- case Triple::COFF:
- return LowerGlobalAddressWindows(Op, DAG);
- case Triple::ELF:
- return LowerGlobalAddressELF(Op, DAG);
- case Triple::MachO:
- return LowerGlobalAddressDarwin(Op, DAG);
- }
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
@@ -7607,6 +7767,37 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
}
+static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned Opc = 0;
+ if (IntNo == Intrinsic::arm_smlald)
+ Opc = ARMISD::SMLALD;
+ else if (IntNo == Intrinsic::arm_smlaldx)
+ Opc = ARMISD::SMLALDX;
+ else if (IntNo == Intrinsic::arm_smlsld)
+ Opc = ARMISD::SMLSLD;
+ else if (IntNo == Intrinsic::arm_smlsldx)
+ Opc = ARMISD::SMLSLDX;
+ else
+ return;
+
+ SDLoc dl(N);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(3),
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(3),
+ DAG.getConstant(1, dl, MVT::i32));
+
+ SDValue LongMul = DAG.getNode(Opc, dl,
+ DAG.getVTList(MVT::i32, MVT::i32),
+ N->getOperand(1), N->getOperand(2),
+ Lo, Hi);
+ Results.push_back(LongMul.getValue(0));
+ Results.push_back(LongMul.getValue(1));
+}
+
/// ReplaceNodeResults - Replace the results of node with an illegal result
/// type with new values built out of custom code.
void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
@@ -7648,6 +7839,8 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ATOMIC_CMP_SWAP:
ReplaceCMP_SWAP_64Results(N, Results, DAG);
return;
+ case ISD::INTRINSIC_WO_CHAIN:
+ return ReplaceLongIntrinsic(N, Results, DAG);
}
if (Res.getNode())
Results.push_back(Res);
@@ -7702,24 +7895,27 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
// add r5, pc
// str r5, [$jbuf, #+4] ; &jbuf[1]
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
- .addConstantPoolIndex(CPI)
- .addMemOperand(CPMMO));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO)
+ .add(predOps(ARMCC::AL));
// Set the low bit because of thumb mode.
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultCC(
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
- .addReg(NewVReg1, RegState::Kill)
- .addImm(0x01)));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(0x01)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
.addReg(NewVReg2, RegState::Kill)
.addImm(PCLabelId);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
- .addReg(NewVReg3, RegState::Kill)
- .addFrameIndex(FI)
- .addImm(36) // &jbuf[1] :: pc
- .addMemOperand(FIMMOSt));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
+ .addReg(NewVReg3, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt)
+ .add(predOps(ARMCC::AL));
} else if (isThumb) {
// Incoming value: jbuf
// ldr.n r1, LCPI1_4
@@ -7729,51 +7925,58 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
// add r2, $jbuf, #+4 ; &jbuf[1]
// str r1, [r2]
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
- .addConstantPoolIndex(CPI)
- .addMemOperand(CPMMO));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
.addReg(NewVReg1, RegState::Kill)
.addImm(PCLabelId);
// Set the low bit because of thumb mode.
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
- .addReg(ARM::CPSR, RegState::Define)
- .addImm(1));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
- .addReg(ARM::CPSR, RegState::Define)
- .addReg(NewVReg2, RegState::Kill)
- .addReg(NewVReg3, RegState::Kill));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3, RegState::Kill)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
.addFrameIndex(FI)
.addImm(36); // &jbuf[1] :: pc
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
- .addReg(NewVReg4, RegState::Kill)
- .addReg(NewVReg5, RegState::Kill)
- .addImm(0)
- .addMemOperand(FIMMOSt));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
+ .addReg(NewVReg4, RegState::Kill)
+ .addReg(NewVReg5, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(FIMMOSt)
+ .add(predOps(ARMCC::AL));
} else {
// Incoming value: jbuf
// ldr r1, LCPI1_1
// add r1, pc, r1
// str r1, [$jbuf, #+4] ; &jbuf[1]
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
- .addConstantPoolIndex(CPI)
- .addImm(0)
- .addMemOperand(CPMMO));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addImm(0)
+ .addMemOperand(CPMMO)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
- .addReg(NewVReg1, RegState::Kill)
- .addImm(PCLabelId));
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
- .addReg(NewVReg2, RegState::Kill)
- .addFrameIndex(FI)
- .addImm(36) // &jbuf[1] :: pc
- .addMemOperand(FIMMOSt));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(PCLabelId)
+ .add(predOps(ARMCC::AL));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
+ .addReg(NewVReg2, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt)
+ .add(predOps(ARMCC::AL));
}
}
@@ -7791,7 +7994,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
// Get a mapping of the call site numbers to all of the landing pads they're
// associated with.
- DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
+ DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;
unsigned MaxCSNum = 0;
for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
++BB) {
@@ -7886,31 +8089,36 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
unsigned NumLPads = LPadList.size();
if (Subtarget->isThumb2()) {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
- .addFrameIndex(FI)
- .addImm(4)
- .addMemOperand(FIMMOLd));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd)
+ .add(predOps(ARMCC::AL));
if (NumLPads < 256) {
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
- .addReg(NewVReg1)
- .addImm(LPadList.size()));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size())
+ .add(predOps(ARMCC::AL));
} else {
unsigned VReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
- .addImm(NumLPads & 0xFFFF));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
+ .addImm(NumLPads & 0xFFFF)
+ .add(predOps(ARMCC::AL));
unsigned VReg2 = VReg1;
if ((NumLPads & 0xFFFF0000) != 0) {
VReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
- .addReg(VReg1)
- .addImm(NumLPads >> 16));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
+ .addReg(VReg1)
+ .addImm(NumLPads >> 16)
+ .add(predOps(ARMCC::AL));
}
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
- .addReg(NewVReg1)
- .addReg(VReg2));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg2)
+ .add(predOps(ARMCC::AL));
}
BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
@@ -7919,16 +8127,17 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addReg(ARM::CPSR);
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
- .addJumpTableIndex(MJTI));
+ BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
- AddDefaultCC(
- AddDefaultPred(
- BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
+ BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
.addReg(NewVReg3, RegState::Kill)
.addReg(NewVReg1)
- .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
.addReg(NewVReg4, RegState::Kill)
@@ -7936,15 +8145,17 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addJumpTableIndex(MJTI);
} else if (Subtarget->isThumb()) {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
- .addFrameIndex(FI)
- .addImm(1)
- .addMemOperand(FIMMOLd));
+ BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(1)
+ .addMemOperand(FIMMOLd)
+ .add(predOps(ARMCC::AL));
if (NumLPads < 256) {
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
- .addReg(NewVReg1)
- .addImm(NumLPads));
+ BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
+ .addReg(NewVReg1)
+ .addImm(NumLPads)
+ .add(predOps(ARMCC::AL));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -7957,12 +8168,14 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
- .addReg(VReg1, RegState::Define)
- .addConstantPoolIndex(Idx));
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
- .addReg(NewVReg1)
- .addReg(VReg1));
+ BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
+ .addReg(VReg1, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .add(predOps(ARMCC::AL));
+ BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
+ .addReg(NewVReg1)
+ .addReg(VReg1)
+ .add(predOps(ARMCC::AL));
}
BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
@@ -7971,37 +8184,42 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addReg(ARM::CPSR);
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
- .addReg(ARM::CPSR, RegState::Define)
- .addReg(NewVReg1)
- .addImm(2));
+ BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg1)
+ .addImm(2)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
- .addJumpTableIndex(MJTI));
+ BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
- .addReg(ARM::CPSR, RegState::Define)
- .addReg(NewVReg2, RegState::Kill)
- .addReg(NewVReg3));
+ BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3)
+ .add(predOps(ARMCC::AL));
MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
- .addReg(NewVReg4, RegState::Kill)
- .addImm(0)
- .addMemOperand(JTMMOLd));
+ BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
+ .addReg(NewVReg4, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(JTMMOLd)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg6 = NewVReg5;
if (IsPositionIndependent) {
NewVReg6 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
- .addReg(ARM::CPSR, RegState::Define)
- .addReg(NewVReg5, RegState::Kill)
- .addReg(NewVReg3));
+ BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg3)
+ .add(predOps(ARMCC::AL));
}
BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
@@ -8009,31 +8227,36 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addJumpTableIndex(MJTI);
} else {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
- .addFrameIndex(FI)
- .addImm(4)
- .addMemOperand(FIMMOLd));
+ BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd)
+ .add(predOps(ARMCC::AL));
if (NumLPads < 256) {
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
- .addReg(NewVReg1)
- .addImm(NumLPads));
+ BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
+ .addReg(NewVReg1)
+ .addImm(NumLPads)
+ .add(predOps(ARMCC::AL));
} else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
unsigned VReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
- .addImm(NumLPads & 0xFFFF));
+ BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
+ .addImm(NumLPads & 0xFFFF)
+ .add(predOps(ARMCC::AL));
unsigned VReg2 = VReg1;
if ((NumLPads & 0xFFFF0000) != 0) {
VReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
- .addReg(VReg1)
- .addImm(NumLPads >> 16));
+ BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
+ .addReg(VReg1)
+ .addImm(NumLPads >> 16)
+ .add(predOps(ARMCC::AL));
}
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
- .addReg(NewVReg1)
- .addReg(VReg2));
+ BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg2)
+ .add(predOps(ARMCC::AL));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -8046,13 +8269,15 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
- .addReg(VReg1, RegState::Define)
- .addConstantPoolIndex(Idx)
- .addImm(0));
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
- .addReg(NewVReg1)
- .addReg(VReg1, RegState::Kill));
+ BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
+ .addReg(VReg1, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+ BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg1, RegState::Kill)
+ .add(predOps(ARMCC::AL));
}
BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
@@ -8061,23 +8286,25 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addReg(ARM::CPSR);
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
- AddDefaultCC(
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
- .addReg(NewVReg1)
- .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+ BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
+ .addReg(NewVReg1)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
- .addJumpTableIndex(MJTI));
+ BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
+ .addJumpTableIndex(MJTI)
+ .add(predOps(ARMCC::AL));
MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(
- BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
- .addReg(NewVReg3, RegState::Kill)
- .addReg(NewVReg4)
- .addImm(0)
- .addMemOperand(JTMMOLd));
+ BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
+ .addReg(NewVReg3, RegState::Kill)
+ .addReg(NewVReg4)
+ .addImm(0)
+ .addMemOperand(JTMMOLd)
+ .add(predOps(ARMCC::AL));
if (IsPositionIndependent) {
BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
@@ -8222,26 +8449,35 @@ static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
assert(LdOpc != 0 && "Should have a load opcode");
if (LdSize >= 8) {
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
- .addReg(AddrOut, RegState::Define).addReg(AddrIn)
- .addImm(0));
+ BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define)
+ .addReg(AddrIn)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
} else if (IsThumb1) {
// load + update AddrIn
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
- .addReg(AddrIn).addImm(0));
- MachineInstrBuilder MIB =
- BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(AddrIn).addImm(LdSize);
- AddDefaultPred(MIB);
+ BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrIn)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
+ .add(t1CondCodeOp())
+ .addReg(AddrIn)
+ .addImm(LdSize)
+ .add(predOps(ARMCC::AL));
} else if (IsThumb2) {
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
- .addReg(AddrOut, RegState::Define).addReg(AddrIn)
- .addImm(LdSize));
+ BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define)
+ .addReg(AddrIn)
+ .addImm(LdSize)
+ .add(predOps(ARMCC::AL));
} else { // arm
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
- .addReg(AddrOut, RegState::Define).addReg(AddrIn)
- .addReg(0).addImm(LdSize));
+ BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define)
+ .addReg(AddrIn)
+ .addReg(0)
+ .addImm(LdSize)
+ .add(predOps(ARMCC::AL));
}
}
@@ -8254,24 +8490,36 @@ static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
assert(StOpc != 0 && "Should have a store opcode");
if (StSize >= 8) {
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
- .addReg(AddrIn).addImm(0).addReg(Data));
+ BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(AddrIn)
+ .addImm(0)
+ .addReg(Data)
+ .add(predOps(ARMCC::AL));
} else if (IsThumb1) {
// store + update AddrIn
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data)
- .addReg(AddrIn).addImm(0));
- MachineInstrBuilder MIB =
- BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(AddrIn).addImm(StSize);
- AddDefaultPred(MIB);
+ BuildMI(*BB, Pos, dl, TII->get(StOpc))
+ .addReg(Data)
+ .addReg(AddrIn)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
+ .add(t1CondCodeOp())
+ .addReg(AddrIn)
+ .addImm(StSize)
+ .add(predOps(ARMCC::AL));
} else if (IsThumb2) {
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
- .addReg(Data).addReg(AddrIn).addImm(StSize));
+ BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data)
+ .addReg(AddrIn)
+ .addImm(StSize)
+ .add(predOps(ARMCC::AL));
} else { // arm
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
- .addReg(Data).addReg(AddrIn).addReg(0)
- .addImm(StSize));
+ BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data)
+ .addReg(AddrIn)
+ .addReg(0)
+ .addImm(StSize)
+ .add(predOps(ARMCC::AL));
}
}
@@ -8402,16 +8650,15 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
Vtmp = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl,
- TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16),
- Vtmp).addImm(LoopSize & 0xFFFF));
+ BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp)
+ .addImm(LoopSize & 0xFFFF)
+ .add(predOps(ARMCC::AL));
if ((LoopSize & 0xFFFF0000) != 0)
- AddDefaultPred(BuildMI(BB, dl,
- TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16),
- varEnd)
- .addReg(Vtmp)
- .addImm(LoopSize >> 16));
+ BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd)
+ .addReg(Vtmp)
+ .addImm(LoopSize >> 16)
+ .add(predOps(ARMCC::AL));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -8424,11 +8671,16 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
if (IsThumb)
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
- varEnd, RegState::Define).addConstantPoolIndex(Idx));
+ BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
+ .addReg(varEnd, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .add(predOps(ARMCC::AL));
else
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(
- varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0));
+ BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
+ .addReg(varEnd, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
}
BB->addSuccessor(loopMBB);
@@ -8465,16 +8717,19 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
// Decrement loop variable by UnitSize.
if (IsThumb1) {
- MachineInstrBuilder MIB =
- BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop);
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(varPhi).addImm(UnitSize);
- AddDefaultPred(MIB);
+ BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
+ .add(t1CondCodeOp())
+ .addReg(varPhi)
+ .addImm(UnitSize)
+ .add(predOps(ARMCC::AL));
} else {
MachineInstrBuilder MIB =
BuildMI(*BB, BB->end(), dl,
TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
- AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
+ MIB.addReg(varPhi)
+ .addImm(UnitSize)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
MIB->getOperand(5).setReg(ARM::CPSR);
MIB->getOperand(5).setIsDef(true);
}
@@ -8545,11 +8800,14 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
case CodeModel::Default:
case CodeModel::Kernel:
BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addExternalSymbol("__chkstk")
- .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
- .addReg(ARM::R4, RegState::Implicit | RegState::Define)
- .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ .add(predOps(ARMCC::AL))
+ .addExternalSymbol("__chkstk")
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(ARM::CPSR,
+ RegState::Implicit | RegState::Define | RegState::Dead);
break;
case CodeModel::Large:
case CodeModel::JITDefault: {
@@ -8559,20 +8817,24 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
.addExternalSymbol("__chkstk");
BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addReg(Reg, RegState::Kill)
- .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
- .addReg(ARM::R4, RegState::Implicit | RegState::Define)
- .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ .add(predOps(ARMCC::AL))
+ .addReg(Reg, RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(ARM::CPSR,
+ RegState::Implicit | RegState::Define | RegState::Dead);
break;
}
}
- AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
- ARM::SP)
- .addReg(ARM::SP, RegState::Kill)
- .addReg(ARM::R4, RegState::Kill)
- .setMIFlags(MachineInstr::FrameSetup)));
+ BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
+ .addReg(ARM::SP, RegState::Kill)
+ .addReg(ARM::R4, RegState::Kill)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
MI.eraseFromParent();
return MBB;
@@ -8597,9 +8859,10 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
MF->push_back(TrapBB);
MBB->addSuccessor(TrapBB);
- AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
- .addReg(MI.getOperand(0).getReg())
- .addImm(0));
+ BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
+ .addReg(MI.getOperand(0).getReg())
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
.addMBB(TrapBB)
.addImm(ARMCC::EQ)
@@ -8617,18 +8880,18 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
bool isThumb2 = Subtarget->isThumb2();
switch (MI.getOpcode()) {
default: {
- MI.dump();
+ MI.print(errs());
llvm_unreachable("Unexpected instr type to insert");
}
// Thumb1 post-indexed loads are really just single-register LDMs.
case ARM::tLDR_postidx: {
BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
- .addOperand(MI.getOperand(1)) // Rn_wb
- .addOperand(MI.getOperand(2)) // Rn
- .addOperand(MI.getOperand(3)) // PredImm
- .addOperand(MI.getOperand(4)) // PredReg
- .addOperand(MI.getOperand(0)); // Rt
+ .add(MI.getOperand(1)) // Rn_wb
+ .add(MI.getOperand(2)) // Rn
+ .add(MI.getOperand(3)) // PredImm
+ .add(MI.getOperand(4)) // PredReg
+ .add(MI.getOperand(0)); // Rt
MI.eraseFromParent();
return BB;
}
@@ -8659,12 +8922,12 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineMemOperand *MMO = *MI.memoperands_begin();
BuildMI(*BB, MI, dl, TII->get(NewOpc))
- .addOperand(MI.getOperand(0)) // Rn_wb
- .addOperand(MI.getOperand(1)) // Rt
- .addOperand(MI.getOperand(2)) // Rn
- .addImm(Offset) // offset (skip GPR==zero_reg)
- .addOperand(MI.getOperand(5)) // pred
- .addOperand(MI.getOperand(6))
+ .add(MI.getOperand(0)) // Rn_wb
+ .add(MI.getOperand(1)) // Rt
+ .add(MI.getOperand(2)) // Rn
+ .addImm(Offset) // offset (skip GPR==zero_reg)
+ .add(MI.getOperand(5)) // pred
+ .add(MI.getOperand(6))
.addMemOperand(MMO);
MI.eraseFromParent();
return BB;
@@ -8681,7 +8944,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
for (unsigned i = 0; i < MI.getNumOperands(); ++i)
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
MI.eraseFromParent();
return BB;
}
@@ -8754,18 +9017,20 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
unsigned LHS1 = MI.getOperand(1).getReg();
unsigned LHS2 = MI.getOperand(2).getReg();
if (RHSisZero) {
- AddDefaultPred(BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(LHS1).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS1)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(LHS2).addImm(0)
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
} else {
unsigned RHS1 = MI.getOperand(3).getReg();
unsigned RHS2 = MI.getOperand(4).getReg();
- AddDefaultPred(BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
- .addReg(LHS1).addReg(RHS1));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS1)
+ .addReg(RHS1)
+ .add(predOps(ARMCC::AL));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(LHS2).addReg(RHS2)
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
@@ -8779,7 +9044,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
.addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
if (isThumb2)
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));
+ BuildMI(BB, dl, TII->get(ARM::t2B))
+ .addMBB(exitMBB)
+ .add(predOps(ARMCC::AL));
else
BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
@@ -8842,9 +9109,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
RSBBB->addSuccessor(SinkBB);
// insert a cmp at the end of BB
- AddDefaultPred(BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(ABSSrcReg).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(ABSSrcReg)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
// insert a bcc with opposite CC to ARMCC::MI at the end of BB
BuildMI(BB, dl,
@@ -8855,9 +9123,11 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Note: BCC and rsbri will be converted into predicated rsbmi
// by if-conversion pass
BuildMI(*RSBBB, RSBBB->begin(), dl,
- TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
- .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
- .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
+ .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
+ .addImm(0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
// insert PHI in SinkBB,
// reuse ABSDstReg to not change uses of ABS instruction
@@ -8927,19 +9197,45 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// Rename pseudo opcodes.
unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
+ unsigned ccOutIdx;
if (NewOpc) {
const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
MCID = &TII->get(NewOpc);
- assert(MCID->getNumOperands() == MI.getDesc().getNumOperands() + 1 &&
- "converted opcode should be the same except for cc_out");
+ assert(MCID->getNumOperands() ==
+ MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
+ && "converted opcode should be the same except for cc_out"
+ " (and, on Thumb1, pred)");
MI.setDesc(*MCID);
// Add the optional cc_out operand
MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
- }
- unsigned ccOutIdx = MCID->getNumOperands() - 1;
+
+ // On Thumb1, move all input operands to the end, then add the predicate
+ if (Subtarget->isThumb1Only()) {
+ for (unsigned c = MCID->getNumOperands() - 4; c--;) {
+ MI.addOperand(MI.getOperand(1));
+ MI.RemoveOperand(1);
+ }
+
+ // Restore the ties
+ for (unsigned i = MI.getNumOperands(); i--;) {
+ const MachineOperand& op = MI.getOperand(i);
+ if (op.isReg() && op.isUse()) {
+ int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
+ if (DefIdx != -1)
+ MI.tieOperands(DefIdx, i);
+ }
+ }
+
+ MI.addOperand(MachineOperand::CreateImm(ARMCC::AL));
+ MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
+ ccOutIdx = 1;
+ } else
+ ccOutIdx = MCID->getNumOperands() - 1;
+ } else
+ ccOutIdx = MCID->getNumOperands() - 1;
// Any ARM instruction that sets the 's' bit should specify an optional
// "cc_out" operand in the last operand position.
@@ -8970,7 +9266,9 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
if (deadCPSR) {
assert(!MI.getOperand(ccOutIdx).getReg() &&
"expect uninitialized optional cc_out operand");
- return;
+ // Thumb1 instructions must have the S bit even if the CPSR is dead.
+ if (!Subtarget->isThumb1Only())
+ return;
}
// If this instruction was defined with an optional CPSR def and its dag node
@@ -9032,7 +9330,7 @@ static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
SDLoc dl(N);
EVT VT = N->getValueType(0);
CC = N->getOperand(0);
- if (CC.getValueType() != MVT::i1)
+ if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
return false;
Invert = !AllOnes;
if (AllOnes)
@@ -9265,8 +9563,11 @@ AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1,
return SDValue();
}
- // Don't generate vpaddl+vmovn; we'll match it to vpadd later.
- if (Vec.getValueType().getVectorElementType() == VT.getVectorElementType())
+ // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
+ // we're using the entire input vector, otherwise there's a size/legality
+ // mismatch somewhere.
+ if (nextIndex != Vec.getValueType().getVectorNumElements() ||
+ Vec.getValueType().getVectorElementType() == VT.getVectorElementType())
return SDValue();
// Create VPADDL node.
@@ -9308,10 +9609,90 @@ static SDValue findMUL_LOHI(SDValue V) {
return SDValue();
}
-static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
+static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ if (Subtarget->isThumb()) {
+ if (!Subtarget->hasDSP())
+ return SDValue();
+ } else if (!Subtarget->hasV5TEOps())
+ return SDValue();
+
+ // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
+ // accumulates the product into a 64-bit value. The 16-bit values will
+ // be sign extended somehow or SRA'd into 32-bit values
+ // (addc (adde (mul 16bit, 16bit), lo), hi)
+ SDValue Mul = AddcNode->getOperand(0);
+ SDValue Lo = AddcNode->getOperand(1);
+ if (Mul.getOpcode() != ISD::MUL) {
+ Lo = AddcNode->getOperand(0);
+ Mul = AddcNode->getOperand(1);
+ if (Mul.getOpcode() != ISD::MUL)
+ return SDValue();
+ }
+
+ SDValue SRA = AddeNode->getOperand(0);
+ SDValue Hi = AddeNode->getOperand(1);
+ if (SRA.getOpcode() != ISD::SRA) {
+ SRA = AddeNode->getOperand(1);
+ Hi = AddeNode->getOperand(0);
+ if (SRA.getOpcode() != ISD::SRA)
+ return SDValue();
+ }
+ if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
+ if (Const->getZExtValue() != 31)
+ return SDValue();
+ } else
+ return SDValue();
+
+ if (SRA.getOperand(0) != Mul)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(AddcNode);
+ unsigned Opcode = 0;
+ SDValue Op0;
+ SDValue Op1;
+
+ if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
+ Opcode = ARMISD::SMLALBB;
+ Op0 = Mul.getOperand(0);
+ Op1 = Mul.getOperand(1);
+ } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
+ Opcode = ARMISD::SMLALBT;
+ Op0 = Mul.getOperand(0);
+ Op1 = Mul.getOperand(1).getOperand(0);
+ } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
+ Opcode = ARMISD::SMLALTB;
+ Op0 = Mul.getOperand(0).getOperand(0);
+ Op1 = Mul.getOperand(1);
+ } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
+ Opcode = ARMISD::SMLALTT;
+ Op0 = Mul->getOperand(0).getOperand(0);
+ Op1 = Mul->getOperand(1).getOperand(0);
+ }
+
+ if (!Op0 || !Op1)
+ return SDValue();
+
+ SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ Op0, Op1, Lo, Hi);
+ // Replace the ADDs' nodes uses by the MLA node's values.
+ SDValue HiMLALResult(SMLAL.getNode(), 1);
+ SDValue LoMLALResult(SMLAL.getNode(), 0);
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
+
+ // Return original node to notify the driver to stop replacing.
+ SDValue resNode(AddcNode, 0);
+ return resNode;
+}
+
+static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
-
// Look for multiply add opportunities.
// The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
// each add nodes consumes a value from ISD::UMUL_LOHI and there is
@@ -9326,7 +9707,17 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
// \ /
// ADDC <- hiAdd
//
- assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC");
+ assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE");
+
+ assert(AddeNode->getNumOperands() == 3 &&
+ AddeNode->getOperand(2).getValueType() == MVT::i32 &&
+ "ADDE node has the wrong inputs");
+
+ // Check that we have a glued ADDC node.
+ SDNode* AddcNode = AddeNode->getOperand(2).getNode();
+ if (AddcNode->getOpcode() != ARMISD::ADDC)
+ return SDValue();
+
SDValue AddcOp0 = AddcNode->getOperand(0);
SDValue AddcOp1 = AddcNode->getOperand(1);
@@ -9338,29 +9729,13 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
AddcNode->getValueType(0) == MVT::i32 &&
"Expect ADDC with two result values. First: i32");
- // Check that we have a glued ADDC node.
- if (AddcNode->getValueType(1) != MVT::Glue)
- return SDValue();
-
- // Check that the ADDC adds the low result of the S/UMUL_LOHI.
+ // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
+ // maybe a SMLAL which multiplies two 16-bit values.
if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
AddcOp1->getOpcode() != ISD::SMUL_LOHI)
- return SDValue();
-
- // Look for the glued ADDE.
- SDNode* AddeNode = AddcNode->getGluedUser();
- if (!AddeNode)
- return SDValue();
-
- // Make sure it is really an ADDE.
- if (AddeNode->getOpcode() != ISD::ADDE)
- return SDValue();
-
- assert(AddeNode->getNumOperands() == 3 &&
- AddeNode->getOperand(2).getValueType() == MVT::Glue &&
- "ADDE node has the wrong inputs");
+ return AddCombineTo64BitSMLAL16(AddcNode, AddeNode, DCI, Subtarget);
// Check for the triangle shape.
SDValue AddeOp0 = AddeNode->getOperand(0);
@@ -9435,38 +9810,25 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
// Return original node to notify the driver to stop replacing.
- SDValue resNode(AddcNode, 0);
- return resNode;
+ return SDValue(AddeNode, 0);
}
-static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
+static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// UMAAL is similar to UMLAL except that it adds two unsigned values.
// While trying to combine for the other MLAL nodes, first search for the
- // chance to use UMAAL. Check if Addc uses another addc node which can first
- // be combined into a UMLAL. The other pattern is AddcNode being combined
- // into an UMLAL and then using another addc is handled in ISelDAGToDAG.
-
- if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() ||
- (Subtarget->isThumb() && !Subtarget->hasThumb2()))
- return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
-
- SDNode *PrevAddc = nullptr;
- if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC)
- PrevAddc = AddcNode->getOperand(0).getNode();
- else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC)
- PrevAddc = AddcNode->getOperand(1).getNode();
-
- // If there's no addc chains, just return a search for any MLAL.
- if (PrevAddc == nullptr)
- return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
-
- // Try to convert the addc operand to an MLAL and if that fails try to
- // combine AddcNode.
- SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget);
- if (MLAL != SDValue(PrevAddc, 0))
- return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
+ // chance to use UMAAL. Check if Addc uses a node which has already
+ // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
+ // as the addend, and it's handled in PerformUMLALCombine.
+
+ if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
+ return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
+
+ // Check that we have a glued ADDC node.
+ SDNode* AddcNode = AddeNode->getOperand(2).getNode();
+ if (AddcNode->getOpcode() != ARMISD::ADDC)
+ return SDValue();
// Find the converted UMAAL or quit if it doesn't exist.
SDNode *UmlalNode = nullptr;
@@ -9478,29 +9840,18 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
UmlalNode = AddcNode->getOperand(1).getNode();
AddHi = AddcNode->getOperand(0);
} else {
- return SDValue();
+ return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
}
// The ADDC should be glued to an ADDE node, which uses the same UMLAL as
// the ADDC as well as Zero.
- auto *Zero = dyn_cast<ConstantSDNode>(UmlalNode->getOperand(3));
-
- if (!Zero || Zero->getZExtValue() != 0)
- return SDValue();
-
- // Check that we have a glued ADDC node.
- if (AddcNode->getValueType(1) != MVT::Glue)
- return SDValue();
-
- // Look for the glued ADDE.
- SDNode* AddeNode = AddcNode->getGluedUser();
- if (!AddeNode)
+ if (!isNullConstant(UmlalNode->getOperand(3)))
return SDValue();
- if ((AddeNode->getOperand(0).getNode() == Zero &&
+ if ((isNullConstant(AddeNode->getOperand(0)) &&
AddeNode->getOperand(1).getNode() == UmlalNode) ||
(AddeNode->getOperand(0).getNode() == UmlalNode &&
- AddeNode->getOperand(1).getNode() == Zero)) {
+ isNullConstant(AddeNode->getOperand(1)))) {
SelectionDAG &DAG = DCI.DAG;
SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
@@ -9513,19 +9864,84 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
// Return original node to notify the driver to stop replacing.
- return SDValue(AddcNode, 0);
+ return SDValue(AddeNode, 0);
}
return SDValue();
}
-/// PerformADDCCombine - Target-specific dag combine transform from
-/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or
-/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
-static SDValue PerformADDCCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const ARMSubtarget *Subtarget) {
+static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
+ return SDValue();
+
+ // Check that we have a pair of ADDC and ADDE as operands.
+ // Both addends of the ADDE must be zero.
+ SDNode* AddcNode = N->getOperand(2).getNode();
+ SDNode* AddeNode = N->getOperand(3).getNode();
+ if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
+ (AddeNode->getOpcode() == ARMISD::ADDE) &&
+ isNullConstant(AddeNode->getOperand(0)) &&
+ isNullConstant(AddeNode->getOperand(1)) &&
+ (AddeNode->getOperand(2).getNode() == AddcNode))
+ return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
+ DAG.getVTList(MVT::i32, MVT::i32),
+ {N->getOperand(0), N->getOperand(1),
+ AddcNode->getOperand(0), AddcNode->getOperand(1)});
+ else
+ return SDValue();
+}
+
+static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if (Subtarget->isThumb1Only()) {
+ SDValue RHS = N->getOperand(1);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
+ int32_t imm = C->getSExtValue();
+ if (imm < 0 && imm > INT_MIN) {
+ SDLoc DL(N);
+ RHS = DAG.getConstant(-imm, DL, MVT::i32);
+ unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
+ : ARMISD::ADDC;
+ return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
+ }
+ }
+ }
+ return SDValue();
+}
- if (Subtarget->isThumb1Only()) return SDValue();
+static SDValue PerformAddeSubeCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if (Subtarget->isThumb1Only()) {
+ SDValue RHS = N->getOperand(1);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
+ int64_t imm = C->getSExtValue();
+ if (imm < 0) {
+ SDLoc DL(N);
+
+ // The with-carry-in form matches bitwise not instead of the negation.
+ // Effectively, the inverse interpretation of the carry flag already
+ // accounts for part of the negation.
+ RHS = DAG.getConstant(~imm, DL, MVT::i32);
+
+ unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
+ : ARMISD::ADDE;
+ return DAG.getNode(Opcode, DL, N->getVTList(),
+ N->getOperand(0), RHS, N->getOperand(2));
+ }
+ }
+ }
+ return SDValue();
+}
+
+/// PerformADDECombine - Target-specific dag combine transform from
+/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
+/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
+static SDValue PerformADDECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Only ARM and Thumb2 support UMLAL/SMLAL.
+ if (Subtarget->isThumb1Only())
+ return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
// Only perform the checks after legalize when the pattern is available.
if (DCI.isBeforeLegalize()) return SDValue();
@@ -9722,7 +10138,6 @@ static SDValue PerformMULCombine(SDNode *N,
static SDValue PerformANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
-
// Attempt to use immediate-form VBIC
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
SDLoc dl(N);
@@ -9761,6 +10176,67 @@ static SDValue PerformANDCombine(SDNode *N,
return SDValue();
}
+// Try combining OR nodes to SMULWB, SMULWT.
+static SDValue PerformORCombineToSMULWBT(SDNode *OR,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasV6Ops() ||
+ (Subtarget->isThumb() &&
+ (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
+ return SDValue();
+
+ SDValue SRL = OR->getOperand(0);
+ SDValue SHL = OR->getOperand(1);
+
+ if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
+ SRL = OR->getOperand(1);
+ SHL = OR->getOperand(0);
+ }
+ if (!isSRL16(SRL) || !isSHL16(SHL))
+ return SDValue();
+
+ // The first operands to the shifts need to be the two results from the
+ // same smul_lohi node.
+ if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
+ SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
+ return SDValue();
+
+ SDNode *SMULLOHI = SRL.getOperand(0).getNode();
+ if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
+ SHL.getOperand(0) != SDValue(SMULLOHI, 1))
+ return SDValue();
+
+ // Now we have:
+ // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
+ // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
+ // For SMUWB the 16-bit value will signed extended somehow.
+ // For SMULWT only the SRA is required.
+ // Check both sides of SMUL_LOHI
+ SDValue OpS16 = SMULLOHI->getOperand(0);
+ SDValue OpS32 = SMULLOHI->getOperand(1);
+
+ SelectionDAG &DAG = DCI.DAG;
+ if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
+ OpS16 = OpS32;
+ OpS32 = SMULLOHI->getOperand(0);
+ }
+
+ SDLoc dl(OR);
+ unsigned Opcode = 0;
+ if (isS16(OpS16, DAG))
+ Opcode = ARMISD::SMULWB;
+ else if (isSRA16(OpS16)) {
+ Opcode = ARMISD::SMULWT;
+ OpS16 = OpS16->getOperand(0);
+ }
+ else
+ return SDValue();
+
+ SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
+ return SDValue(OR, 0);
+}
+
/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
static SDValue PerformORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
@@ -9798,6 +10274,8 @@ static SDValue PerformORCombine(SDNode *N,
// fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
return Result;
+ if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
+ return Result;
}
// The code below optimizes (or (and X, Y), Z).
@@ -9906,7 +10384,7 @@ static SDValue PerformORCombine(SDNode *N,
(Mask == ~Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
- if (Subtarget->hasT2ExtractPack() &&
+ if (Subtarget->hasDSP() &&
(Mask == 0xffff || Mask == 0xffff0000))
return SDValue();
// 2a
@@ -9922,7 +10400,7 @@ static SDValue PerformORCombine(SDNode *N,
(~Mask == Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
- if (Subtarget->hasT2ExtractPack() &&
+ if (Subtarget->hasDSP() &&
(Mask2 == 0xffff || Mask2 == 0xffff0000))
return SDValue();
// 2b
@@ -10485,11 +10963,8 @@ static SDValue CombineBaseUpdate(SDNode *N,
// If the increment is a constant, it must match the memory ref size.
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
- if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
- uint64_t IncVal = CInc->getZExtValue();
- if (IncVal != NumBytes)
- continue;
- } else if (NumBytes >= 3 * 16) {
+ ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
+ if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) {
// VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
// separate instructions that make it harder to use a non-constant update.
continue;
@@ -11306,34 +11781,6 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero,
- APInt &KnownOne) {
- if (Op.getOpcode() == ARMISD::BFI) {
- // Conservatively, we can recurse down the first operand
- // and just mask out all affected bits.
- computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
-
- // The operand to BFI is already a mask suitable for removing the bits it
- // sets.
- ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
- const APInt &Mask = CI->getAPIntValue();
- KnownZero &= Mask;
- KnownOne &= Mask;
- return;
- }
- if (Op.getOpcode() == ARMISD::CMOV) {
- APInt KZ2(KnownZero.getBitWidth(), 0);
- APInt KO2(KnownOne.getBitWidth(), 0);
- computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne);
- computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2);
-
- KnownZero &= KZ2;
- KnownOne &= KO2;
- return;
- }
- return DAG.computeKnownBits(Op, KnownZero, KnownOne);
-}
-
SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
// If we have a CMOV, OR and AND combination such as:
// if (x & CN)
@@ -11394,9 +11841,9 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
// Lastly, can we determine that the bits defined by OrCI
// are zero in Y?
- APInt KnownZero, KnownOne;
- computeKnownBits(DAG, Y, KnownZero, KnownOne);
- if ((OrCI & KnownZero) != OrCI)
+ KnownBits Known;
+ DAG.computeKnownBits(Y, Known);
+ if ((OrCI & Known.Zero) != OrCI)
return SDValue();
// OK, we can do the combine.
@@ -11534,16 +11981,16 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
}
if (Res.getNode()) {
- APInt KnownZero, KnownOne;
- DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne);
+ KnownBits Known;
+ DAG.computeKnownBits(SDValue(N,0), Known);
// Capture demanded bits information that would be otherwise lost.
- if (KnownZero == 0xfffffffe)
+ if (Known.Zero == 0xfffffffe)
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
DAG.getValueType(MVT::i1));
- else if (KnownZero == 0xffffff00)
+ else if (Known.Zero == 0xffffff00)
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
DAG.getValueType(MVT::i8));
- else if (KnownZero == 0xffff0000)
+ else if (Known.Zero == 0xffff0000)
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
DAG.getValueType(MVT::i16));
}
@@ -11555,13 +12002,17 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
- case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget);
+ case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
+ case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
+ case ARMISD::ADDC:
+ case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget);
+ case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
case ARMISD::BFI: return PerformBFICombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
@@ -11593,6 +12044,56 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return PerformVLDCombine(N, DCI);
case ARMISD::BUILD_VECTOR:
return PerformARMBUILD_VECTORCombine(N, DCI);
+ case ARMISD::SMULWB: {
+ unsigned BitWidth = N->getValueType(0).getSizeInBits();
+ APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
+ if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMULWT: {
+ unsigned BitWidth = N->getValueType(0).getSizeInBits();
+ APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
+ if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMLALBB: {
+ unsigned BitWidth = N->getValueType(0).getSizeInBits();
+ APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
+ if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
+ (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMLALBT: {
+ unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
+ APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
+ unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
+ APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
+ if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
+ (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMLALTB: {
+ unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
+ APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
+ unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
+ APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
+ if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
+ (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMLALTT: {
+ unsigned BitWidth = N->getValueType(0).getSizeInBits();
+ APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
+ if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
+ (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
+ return SDValue();
+ break;
+ }
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
@@ -11688,12 +12189,6 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
}
}
- // Lowering to i32/i16 if the size permits.
- if (Size >= 4)
- return MVT::i32;
- else if (Size >= 2)
- return MVT::i16;
-
// Let the target-independent logic figure it out.
return MVT::Other;
}
@@ -12178,12 +12673,12 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
}
void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
+ KnownBits &Known,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
- unsigned BitWidth = KnownOne.getBitWidth();
- KnownZero = KnownOne = APInt(BitWidth, 0);
+ unsigned BitWidth = Known.getBitWidth();
+ Known.resetAll();
switch (Op.getOpcode()) {
default: break;
case ARMISD::ADDC:
@@ -12193,17 +12688,18 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
// These nodes' second result is a boolean
if (Op.getResNo() == 0)
break;
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
break;
case ARMISD::CMOV: {
// Bits are known zero/one if known on the LHS and RHS.
- DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
- if (KnownZero == 0 && KnownOne == 0) return;
+ DAG.computeKnownBits(Op.getOperand(0), Known, Depth+1);
+ if (Known.isUnknown())
+ return;
- APInt KnownZeroRHS, KnownOneRHS;
- DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
- KnownZero &= KnownZeroRHS;
- KnownOne &= KnownOneRHS;
+ KnownBits KnownRHS;
+ DAG.computeKnownBits(Op.getOperand(1), KnownRHS, Depth+1);
+ Known.Zero &= KnownRHS.Zero;
+ Known.One &= KnownRHS.One;
return;
}
case ISD::INTRINSIC_W_CHAIN: {
@@ -12215,11 +12711,24 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case Intrinsic::arm_ldrex: {
EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
unsigned MemBits = VT.getScalarSizeInBits();
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+ Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
return;
}
}
}
+ case ARMISD::BFI: {
+ // Conservatively, we can recurse down the first operand
+ // and just mask out all affected bits.
+ DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1);
+
+ // The operand to BFI is already a mask suitable for removing the bits it
+ // sets.
+ ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
+ const APInt &Mask = CI->getAPIntValue();
+ Known.Zero &= Mask;
+ Known.One &= Mask;
+ return;
+ }
}
}
@@ -12588,8 +13097,8 @@ static TargetLowering::ArgListTy getDivRemArgList(
Type *ArgTy = ArgVT.getTypeForEVT(*Context);
Entry.Node = N->getOperand(i);
Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
if (Subtarget->isTargetWindows() && Args.size() >= 2)
@@ -12615,7 +13124,9 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
// rem = a - b * div
// return {div, rem}
// This should be lowered into UDIV/SDIV + MLS later on.
- if (Subtarget->hasDivide() && Op->getValueType(0).isSimple() &&
+ bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
+ : Subtarget->hasDivideInARMMode();
+ if (hasDivide && Op->getValueType(0).isSimple() &&
Op->getSimpleValueType(0) == MVT::i32) {
unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
const SDValue Dividend = Op->getOperand(0);
@@ -12639,7 +13150,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy(DAG.getDataLayout()));
- Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);
+ Type *RetTy = StructType::get(Ty, Ty);
if (Subtarget->isTargetWindows())
InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
@@ -12861,7 +13372,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
case Intrinsic::arm_stlexd:
- case Intrinsic::arm_strexd: {
+ case Intrinsic::arm_strexd:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i64;
Info.ptrVal = I.getArgOperand(2);
@@ -12871,9 +13382,9 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.readMem = false;
Info.writeMem = true;
return true;
- }
+
case Intrinsic::arm_ldaexd:
- case Intrinsic::arm_ldrexd: {
+ case Intrinsic::arm_ldrexd:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i64;
Info.ptrVal = I.getArgOperand(0);
@@ -12883,7 +13394,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.readMem = true;
Info.writeMem = false;
return true;
- }
+
default:
break;
}
@@ -12921,7 +13432,7 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
// here.
if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
- Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
+ Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
Builder.getInt32(0), Builder.getInt32(7),
Builder.getInt32(10), Builder.getInt32(5)};
@@ -12932,7 +13443,7 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
llvm_unreachable("makeDMB on a target so old that it has no barriers");
}
} else {
- Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
+ Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
// Only a full system barrier exists in the M-class architectures.
Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
Constant *CDomain = Builder.getInt32(Domain);
@@ -12941,9 +13452,9 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
}
// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
-Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
- AtomicOrdering Ord, bool IsStore,
- bool IsLoad) const {
+Instruction *ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
switch (Ord) {
case AtomicOrdering::NotAtomic:
case AtomicOrdering::Unordered:
@@ -12952,7 +13463,7 @@ Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
case AtomicOrdering::Acquire:
return nullptr; // Nothing to do
case AtomicOrdering::SequentiallyConsistent:
- if (!IsStore)
+ if (!Inst->hasAtomicStore())
return nullptr; // Nothing to do
/*FALLTHROUGH*/
case AtomicOrdering::Release:
@@ -12966,9 +13477,9 @@ Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
llvm_unreachable("Unknown fence ordering in emitLeadingFence");
}
-Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
- AtomicOrdering Ord, bool IsStore,
- bool IsLoad) const {
+Instruction *ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
switch (Ord) {
case AtomicOrdering::NotAtomic:
case AtomicOrdering::Unordered:
@@ -13089,7 +13600,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
if (ValTy->getPrimitiveSizeInBits() == 64) {
Intrinsic::ID Int =
IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
- Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
+ Function *Ldrex = Intrinsic::getDeclaration(M, Int);
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
@@ -13106,7 +13617,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
- Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
+ Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
return Builder.CreateTruncOrBitCast(
Builder.CreateCall(Ldrex, Addr),
@@ -13118,7 +13629,7 @@ void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
if (!Subtarget->hasV7Ops())
return;
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- Builder.CreateCall(llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
+ Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
}
Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
@@ -13154,6 +13665,39 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Addr});
}
+/// A helper function for determining the number of interleaved accesses we
+/// will generate when lowering accesses of the given type.
+unsigned
+ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
+ const DataLayout &DL) const {
+ return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
+}
+
+bool ARMTargetLowering::isLegalInterleavedAccessType(
+ VectorType *VecTy, const DataLayout &DL) const {
+
+ unsigned VecSize = DL.getTypeSizeInBits(VecTy);
+ unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
+
+ // Ensure the vector doesn't have f16 elements. Even though we could do an
+ // i16 vldN, we can't hold the f16 vectors and will end up converting via
+ // f32.
+ if (VecTy->getElementType()->isHalfTy())
+ return false;
+
+ // Ensure the number of vector elements is greater than 1.
+ if (VecTy->getNumElements() < 2)
+ return false;
+
+ // Ensure the element type is legal.
+ if (ElSize != 8 && ElSize != 16 && ElSize != 32)
+ return false;
+
+ // Ensure the total vector size is 64 or a multiple of 128. Types larger than
+ // 128 will be split into multiple interleaved accesses.
+ return VecSize == 64 || VecSize % 128 == 0;
+}
+
/// \brief Lower an interleaved load into a vldN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
@@ -13178,64 +13722,99 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Type *EltTy = VecTy->getVectorElementType();
const DataLayout &DL = LI->getModule()->getDataLayout();
- unsigned VecSize = DL.getTypeSizeInBits(VecTy);
- bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
- // Skip if we do not have NEON and skip illegal vector types and vector types
- // with i64/f64 elements (vldN doesn't support i64/f64 elements).
- if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits)
+ // Skip if we do not have NEON and skip illegal vector types. We can
+ // "legalize" wide vector types into multiple interleaved accesses as long as
+ // the vector types are divisible by 128.
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
return false;
+ unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
+
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
if (EltTy->isPointerTy())
VecTy =
VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
+ IRBuilder<> Builder(LI);
+
+ // The base address of the load.
+ Value *BaseAddr = LI->getPointerOperand();
+
+ if (NumLoads > 1) {
+ // If we're going to generate more than one load, reset the sub-vector type
+ // to something legal.
+ VecTy = VectorType::get(VecTy->getVectorElementType(),
+ VecTy->getVectorNumElements() / NumLoads);
+
+ // We will compute the pointer operand of each load from the original base
+ // address using GEPs. Cast the base address to a pointer to the scalar
+ // element type.
+ BaseAddr = Builder.CreateBitCast(
+ BaseAddr, VecTy->getVectorElementType()->getPointerTo(
+ LI->getPointerAddressSpace()));
+ }
+
+ assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
+
+ Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
+ Type *Tys[] = {VecTy, Int8Ptr};
static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
Intrinsic::arm_neon_vld3,
Intrinsic::arm_neon_vld4};
+ Function *VldnFunc =
+ Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
- IRBuilder<> Builder(LI);
- SmallVector<Value *, 2> Ops;
+ // Holds sub-vectors extracted from the load intrinsic return values. The
+ // sub-vectors are associated with the shufflevector instructions they will
+ // replace.
+ DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
- Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
- Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr));
- Ops.push_back(Builder.getInt32(LI->getAlignment()));
+ for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
- Type *Tys[] = { VecTy, Int8Ptr };
- Function *VldnFunc =
- Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
- CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
+ // If we're generating more than one load, compute the base address of
+ // subsequent loads as an offset from the previous.
+ if (LoadCount > 0)
+ BaseAddr = Builder.CreateConstGEP1_32(
+ BaseAddr, VecTy->getVectorNumElements() * Factor);
- // Replace uses of each shufflevector with the corresponding vector loaded
- // by ldN.
- for (unsigned i = 0; i < Shuffles.size(); i++) {
- ShuffleVectorInst *SV = Shuffles[i];
- unsigned Index = Indices[i];
+ SmallVector<Value *, 2> Ops;
+ Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
+ Ops.push_back(Builder.getInt32(LI->getAlignment()));
- Value *SubVec = Builder.CreateExtractValue(VldN, Index);
+ CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
- // Convert the integer vector to pointer vector if the element is pointer.
- if (EltTy->isPointerTy())
- SubVec = Builder.CreateIntToPtr(SubVec, SV->getType());
+ // Replace uses of each shufflevector with the corresponding vector loaded
+ // by ldN.
+ for (unsigned i = 0; i < Shuffles.size(); i++) {
+ ShuffleVectorInst *SV = Shuffles[i];
+ unsigned Index = Indices[i];
- SV->replaceAllUsesWith(SubVec);
- }
+ Value *SubVec = Builder.CreateExtractValue(VldN, Index);
- return true;
-}
+ // Convert the integer vector to pointer vector if the element is pointer.
+ if (EltTy->isPointerTy())
+ SubVec = Builder.CreateIntToPtr(
+ SubVec, VectorType::get(SV->getType()->getVectorElementType(),
+ VecTy->getVectorNumElements()));
-/// \brief Get a mask consisting of sequential integers starting from \p Start.
-///
-/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
-static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
- unsigned NumElts) {
- SmallVector<Constant *, 16> Mask;
- for (unsigned i = 0; i < NumElts; i++)
- Mask.push_back(Builder.getInt32(Start + i));
+ SubVecs[SV].push_back(SubVec);
+ }
+ }
- return ConstantVector::get(Mask);
+ // Replace uses of the shufflevector instructions with the sub-vectors
+ // returned by the load intrinsic. If a shufflevector instruction is
+ // associated with more than one sub-vector, those sub-vectors will be
+ // concatenated into a single wide vector.
+ for (ShuffleVectorInst *SVI : Shuffles) {
+ auto &SubVec = SubVecs[SVI];
+ auto *WideVec =
+ SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
+ SVI->replaceAllUsesWith(WideVec);
+ }
+
+ return true;
}
/// \brief Lower an interleaved store into a vstN intrinsic.
@@ -13279,15 +13858,15 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
- unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
- bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
- // Skip if we do not have NEON and skip illegal vector types and vector types
- // with i64/f64 elements (vstN doesn't support i64/f64 elements).
- if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) ||
- EltIs64Bits)
+ // Skip if we do not have NEON and skip illegal vector types. We can
+ // "legalize" wide vector types into multiple interleaved accesses as long as
+ // the vector types are divisible by 128.
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
return false;
+ unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
+
Value *Op0 = SVI->getOperand(0);
Value *Op1 = SVI->getOperand(1);
IRBuilder<> Builder(SI);
@@ -13306,44 +13885,75 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
SubVecTy = VectorType::get(IntTy, LaneLen);
}
+ // The base address of the store.
+ Value *BaseAddr = SI->getPointerOperand();
+
+ if (NumStores > 1) {
+ // If we're going to generate more than one store, reset the lane length
+ // and sub-vector type to something legal.
+ LaneLen /= NumStores;
+ SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen);
+
+ // We will compute the pointer operand of each store from the original base
+ // address using GEPs. Cast the base address to a pointer to the scalar
+ // element type.
+ BaseAddr = Builder.CreateBitCast(
+ BaseAddr, SubVecTy->getVectorElementType()->getPointerTo(
+ SI->getPointerAddressSpace()));
+ }
+
+ assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
+
+ auto Mask = SVI->getShuffleMask();
+
+ Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
+ Type *Tys[] = {Int8Ptr, SubVecTy};
static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
Intrinsic::arm_neon_vst3,
Intrinsic::arm_neon_vst4};
- SmallVector<Value *, 6> Ops;
- Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
- Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr));
+ for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
- Type *Tys[] = { Int8Ptr, SubVecTy };
- Function *VstNFunc = Intrinsic::getDeclaration(
- SI->getModule(), StoreInts[Factor - 2], Tys);
+ // If we generating more than one store, we compute the base address of
+ // subsequent stores as an offset from the previous.
+ if (StoreCount > 0)
+ BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor);
- // Split the shufflevector operands into sub vectors for the new vstN call.
- auto Mask = SVI->getShuffleMask();
- for (unsigned i = 0; i < Factor; i++) {
- if (Mask[i] >= 0) {
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
- } else {
- unsigned StartMask = 0;
- for (unsigned j = 1; j < LaneLen; j++) {
- if (Mask[j*Factor + i] >= 0) {
- StartMask = Mask[j*Factor + i] - j;
- break;
+ SmallVector<Value *, 6> Ops;
+ Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
+
+ Function *VstNFunc =
+ Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
+
+ // Split the shufflevector operands into sub vectors for the new vstN call.
+ for (unsigned i = 0; i < Factor; i++) {
+ unsigned IdxI = StoreCount * LaneLen * Factor + i;
+ if (Mask[IdxI] >= 0) {
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
+ } else {
+ unsigned StartMask = 0;
+ for (unsigned j = 1; j < LaneLen; j++) {
+ unsigned IdxJ = StoreCount * LaneLen * Factor + j;
+ if (Mask[IdxJ * Factor + IdxI] >= 0) {
+ StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
+ break;
+ }
}
+ // Note: If all elements in a chunk are undefs, StartMask=0!
+ // Note: Filling undef gaps with random elements is ok, since
+ // those elements were being written anyway (with undefs).
+ // In the case of all undefs we're defaulting to using elems from 0
+ // Note: StartMask cannot be negative, it's checked in
+ // isReInterleaveMask
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
}
- // Note: If all elements in a chunk are undefs, StartMask=0!
- // Note: Filling undef gaps with random elements is ok, since
- // those elements were being written anyway (with undefs).
- // In the case of all undefs we're defaulting to using elems from 0
- // Note: StartMask cannot be negative, it's checked in isReInterleaveMask
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
}
- }
- Ops.push_back(Builder.getInt32(SI->getAlignment()));
- Builder.CreateCall(VstNFunc, Ops);
+ Ops.push_back(Builder.getInt32(SI->getAlignment()));
+ Builder.CreateCall(VstNFunc, Ops);
+ }
return true;
}
@@ -13484,3 +14094,8 @@ void ARMTargetLowering::insertCopiesSplitCSR(
.addReg(NewVR);
}
}
+
+void ARMTargetLowering::finalizeLowering(MachineFunction &MF) const {
+ MF.getFrameInfo().computeMaxCallFrameSize(MF);
+ TargetLoweringBase::finalizeLowering(MF);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 84c6eb8..f05b142 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -175,9 +175,19 @@ class InstrItineraryData;
VMULLs, // ...signed
VMULLu, // ...unsigned
+ SMULWB, // Signed multiply word by half word, bottom
+ SMULWT, // Signed multiply word by half word, top
UMLAL, // 64bit Unsigned Accumulate Multiply
SMLAL, // 64bit Signed Accumulate Multiply
UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply
+ SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16
+ SMLALBT, // 64-bit signed accumulate multiply bottom, top 16
+ SMLALTB, // 64-bit signed accumulate multiply top, bottom 16
+ SMLALTT, // 64-bit signed accumulate multiply top, top 16
+ SMLALD, // Signed multiply accumulate long dual
+ SMLALDX, // Signed multiply accumulate long dual exchange
+ SMLSLD, // Signed multiply subtract long dual
+ SMLSLDX, // Signed multiply subtract long dual exchange
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
@@ -344,8 +354,8 @@ class InstrItineraryData;
SDValue &Offset, ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
- void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
- APInt &KnownOne,
+ void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const override;
@@ -473,10 +483,10 @@ class InstrItineraryData;
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
- Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
- bool IsStore, bool IsLoad) const override;
- Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
- bool IsStore, bool IsLoad) const override;
+ Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
+ AtomicOrdering Ord) const override;
+ Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
+ AtomicOrdering Ord) const override;
unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
@@ -500,9 +510,19 @@ class InstrItineraryData;
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
unsigned &Cost) const override;
+ bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
+ const SelectionDAG &DAG) const override {
+ // Do not merge to larger than i32.
+ return (MemVT.getSizeInBits() <= 32);
+ }
+
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
+ return VT.isScalarInteger();
+ }
+
bool supportSwiftError() const override {
return true;
}
@@ -514,6 +534,19 @@ class InstrItineraryData;
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const;
CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const;
+ /// Returns true if \p VecTy is a legal interleaved access type. This
+ /// function checks the vector element type and the overall width of the
+ /// vector.
+ bool isLegalInterleavedAccessType(VectorType *VecTy,
+ const DataLayout &DL) const;
+
+ /// Returns the number of interleaved accesses that will be generated when
+ /// lowering accesses of the given type.
+ unsigned getNumInterleavedAccesses(VectorType *VecTy,
+ const DataLayout &DL) const;
+
+ void finalizeLowering(MachineFunction &MF) const override;
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -569,6 +602,8 @@ class InstrItineraryData;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const;
@@ -690,7 +725,7 @@ class InstrItineraryData;
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
- bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
SDValue ARMcc, SDValue CCR, SDValue Cmp,
@@ -698,7 +733,7 @@ class InstrItineraryData;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
- const SDLoc &dl) const;
+ const SDLoc &dl, bool InvalidOnQNaN) const;
SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
index 488439f..1bbe7f0 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -184,7 +184,7 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
// ARM special operands for disassembly only.
//
-def SetEndAsmOperand : ImmAsmOperand {
+def SetEndAsmOperand : ImmAsmOperand<0,1> {
let Name = "SetEndImm";
let ParserMethod = "parseSetEndImm";
}
@@ -221,25 +221,25 @@ def banked_reg : Operand<i32> {
// 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
// 64 64 - <imm> is encoded in imm6<5:0>
-def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; }
+def shr_imm8_asm_operand : ImmAsmOperand<1,8> { let Name = "ShrImm8"; }
def shr_imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 8; }]> {
let EncoderMethod = "getShiftRight8Imm";
let DecoderMethod = "DecodeShiftRight8Imm";
let ParserMatchClass = shr_imm8_asm_operand;
}
-def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; }
+def shr_imm16_asm_operand : ImmAsmOperand<1,16> { let Name = "ShrImm16"; }
def shr_imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 16; }]> {
let EncoderMethod = "getShiftRight16Imm";
let DecoderMethod = "DecodeShiftRight16Imm";
let ParserMatchClass = shr_imm16_asm_operand;
}
-def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; }
+def shr_imm32_asm_operand : ImmAsmOperand<1,32> { let Name = "ShrImm32"; }
def shr_imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]> {
let EncoderMethod = "getShiftRight32Imm";
let DecoderMethod = "DecodeShiftRight32Imm";
let ParserMatchClass = shr_imm32_asm_operand;
}
-def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; }
+def shr_imm64_asm_operand : ImmAsmOperand<1,64> { let Name = "ShrImm64"; }
def shr_imm64 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 64; }]> {
let EncoderMethod = "getShiftRight64Imm";
let DecoderMethod = "DecodeShiftRight64Imm";
@@ -261,10 +261,19 @@ def const_pool_asm_imm : Operand<i32> {
// Note: When EmitPriority == 1, the alias will be used for printing
class ARMInstAlias<string Asm, dag Result, bit EmitPriority = 0>
: InstAlias<Asm, Result, EmitPriority>, Requires<[IsARM]>;
+class ARMInstSubst<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>,
+ Requires<[IsARM,UseNegativeImmediates]>;
class tInstAlias<string Asm, dag Result, bit EmitPriority = 0>
: InstAlias<Asm, Result, EmitPriority>, Requires<[IsThumb]>;
+class tInstSubst<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>,
+ Requires<[IsThumb,UseNegativeImmediates]>;
class t2InstAlias<string Asm, dag Result, bit EmitPriority = 0>
: InstAlias<Asm, Result, EmitPriority>, Requires<[IsThumb2]>;
+class t2InstSubst<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>,
+ Requires<[IsThumb2,UseNegativeImmediates]>;
class VFP2InstAlias<string Asm, dag Result, bit EmitPriority = 0>
: InstAlias<Asm, Result, EmitPriority>, Requires<[HasVFP2]>;
class VFP2DPInstAlias<string Asm, dag Result, bit EmitPriority = 0>
@@ -948,7 +957,7 @@ class ADivA1I<bits<3> opcod, dag oops, dag iops,
}
// PKH instructions
-def PKHLSLAsmOperand : ImmAsmOperand {
+def PKHLSLAsmOperand : ImmAsmOperand<0,31> {
let Name = "PKHLSLImm";
let ParserMethod = "parsePKHLSLImm";
}
@@ -1013,9 +1022,6 @@ class Thumb2DSPPat<dag pattern, dag result> : Pat<pattern, result> {
class Thumb2DSPMulPat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsThumb2, UseMulOps, HasDSP];
}
-class Thumb2ExtractPat<dag pattern, dag result> : Pat<pattern, result> {
- list<Predicate> Predicates = [IsThumb2, HasT2ExtractPack];
-}
//===----------------------------------------------------------------------===//
// Thumb Instruction Format Definitions.
//
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 27b6432..a0e2ac4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -32,8 +32,8 @@ using namespace llvm;
ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI), RI() {}
-/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
-void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+/// Return the noop instruction to use for a noop.
+void ARMInstrInfo::getNoop(MCInst &NopInst) const {
if (hasNOP()) {
NopInst.setOpcode(ARM::HINT);
NopInst.addOperand(MCOperand::createImm(0));
@@ -129,8 +129,9 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
MIB.addMemOperand(MMO);
- MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg);
- MIB.addReg(Reg, RegState::Kill).addImm(0);
- MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
- AddDefaultPred(MIB);
+ BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(0)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
+ .add(predOps(ARMCC::AL));
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
index 4b1b709..c87fb97 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
@@ -25,8 +25,8 @@ class ARMInstrInfo : public ARMBaseInstrInfo {
public:
explicit ARMInstrInfo(const ARMSubtarget &STI);
- /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
- void getNoopForMachoTarget(MCInst &NopInst) const override;
+ /// Return the noop instruction to use for a noop.
+ void getNoop(MCInst &NopInst) const override;
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index c473939..7206083 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -16,7 +16,8 @@
//
// Type profiles.
-def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
def SDT_ARMStructByVal : SDTypeProfile<0, 4,
[SDTCisVT<0, i32>, SDTCisVT<1, i32>,
@@ -51,6 +52,8 @@ def SDT_ARMAnd : SDTypeProfile<1, 2,
SDTCisVT<2, i32>]>;
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_ARMFCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
@@ -90,6 +93,18 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
SDTCisVT<1, i32>,
SDTCisVT<4, i32>]>;
+def SDT_LongMac : SDTypeProfile<2, 4, [SDTCisVT<0, i32>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisSameAs<0, 4>,
+ SDTCisSameAs<0, 5>]>;
+
+def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>;
+def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>;
+def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>;
+def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
@@ -181,6 +196,13 @@ def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
SDNPMayStore, SDNPMayLoad]>;
+def ARMsmulwb : SDNode<"ARMISD::SMULWB", SDTIntBinOp, []>;
+def ARMsmulwt : SDNode<"ARMISD::SMULWT", SDTIntBinOp, []>;
+def ARMsmlalbb : SDNode<"ARMISD::SMLALBB", SDT_LongMac, []>;
+def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>;
+def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>;
+def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
@@ -243,13 +265,10 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float conversions">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
AssemblerPredicate<"FeatureFullFP16","full half-float">;
-def HasDivide : Predicate<"Subtarget->hasDivide()">,
- AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">;
+def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
+ AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
-def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
- AssemblerPredicate<"FeatureT2XtPk",
- "pack/extract">;
def HasDSP : Predicate<"Subtarget->hasDSP()">,
AssemblerPredicate<"FeatureDSP", "dsp">;
def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
@@ -298,9 +317,16 @@ def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">,
AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">;
+def UseNegativeImmediates :
+ Predicate<"false">,
+ AssemblerPredicate<"!FeatureNoNegativeImmediates",
+ "NegativeImmediates">;
+
// FIXME: Eventually this will be just "hasV6T2Ops".
-def UseMovt : Predicate<"Subtarget->useMovt(*MF)">;
-def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">;
+let RecomputePerFunction = 1 in {
+ def UseMovt : Predicate<"Subtarget->useMovt(*MF)">;
+ def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">;
+}
def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
def UseMulOps : Predicate<"Subtarget->useMulOps()">;
@@ -327,8 +353,10 @@ def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||"
def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&"
"Subtarget->useNEONForSinglePrecisionFP()">;
-def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">;
-def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">;
+let RecomputePerFunction = 1 in {
+ def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">;
+ def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">;
+}
def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">;
@@ -423,7 +451,16 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
//
// Immediate operands with a shared generic asm render method.
-class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; }
+class ImmAsmOperand<int Low, int High> : AsmOperandClass {
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isImmediate<" # Low # "," # High # ">";
+ let DiagnosticType = "ImmRange" # Low # "_" # High;
+}
+
+class ImmAsmOperandMinusOne<int Low, int High> : AsmOperandClass {
+ let PredicateMethod = "isImmediate<" # Low # "," # High # ">";
+ let DiagnosticType = "ImmRange" # Low # "_" # High;
+}
// Operands that are part of a memory addressing mode.
class MemOperand : Operand<i32> { let OperandType = "OPERAND_MEMORY"; }
@@ -645,35 +682,45 @@ def arm_i32imm : PatLeaf<(imm), [{
}]>;
/// imm0_1 predicate - Immediate in the range [0,1].
-def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; }
+def Imm0_1AsmOperand: ImmAsmOperand<0,1> { let Name = "Imm0_1"; }
def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
/// imm0_3 predicate - Immediate in the range [0,3].
-def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
+def Imm0_3AsmOperand: ImmAsmOperand<0,3> { let Name = "Imm0_3"; }
def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
/// imm0_7 predicate - Immediate in the range [0,7].
-def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
+def Imm0_7AsmOperand: ImmAsmOperand<0,7> {
+ let Name = "Imm0_7";
+}
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 8;
}]> {
let ParserMatchClass = Imm0_7AsmOperand;
}
+/// imm8_255 predicate - Immediate in the range [8,255].
+def Imm8_255AsmOperand: ImmAsmOperand<8,255> { let Name = "Imm8_255"; }
+def imm8_255 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 8 && Imm < 256;
+}]> {
+ let ParserMatchClass = Imm8_255AsmOperand;
+}
+
/// imm8 predicate - Immediate is exactly 8.
-def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; }
+def Imm8AsmOperand: ImmAsmOperand<8,8> { let Name = "Imm8"; }
def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> {
let ParserMatchClass = Imm8AsmOperand;
}
/// imm16 predicate - Immediate is exactly 16.
-def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; }
+def Imm16AsmOperand: ImmAsmOperand<16,16> { let Name = "Imm16"; }
def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> {
let ParserMatchClass = Imm16AsmOperand;
}
/// imm32 predicate - Immediate is exactly 32.
-def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; }
+def Imm32AsmOperand: ImmAsmOperand<32,32> { let Name = "Imm32"; }
def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
let ParserMatchClass = Imm32AsmOperand;
}
@@ -681,25 +728,25 @@ def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
def imm8_or_16 : ImmLeaf<i32, [{ return Imm == 8 || Imm == 16;}]>;
/// imm1_7 predicate - Immediate in the range [1,7].
-def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; }
+def Imm1_7AsmOperand: ImmAsmOperand<1,7> { let Name = "Imm1_7"; }
def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> {
let ParserMatchClass = Imm1_7AsmOperand;
}
/// imm1_15 predicate - Immediate in the range [1,15].
-def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; }
+def Imm1_15AsmOperand: ImmAsmOperand<1,15> { let Name = "Imm1_15"; }
def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> {
let ParserMatchClass = Imm1_15AsmOperand;
}
/// imm1_31 predicate - Immediate in the range [1,31].
-def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; }
+def Imm1_31AsmOperand: ImmAsmOperand<1,31> { let Name = "Imm1_31"; }
def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> {
let ParserMatchClass = Imm1_31AsmOperand;
}
/// imm0_15 predicate - Immediate in the range [0,15].
-def Imm0_15AsmOperand: ImmAsmOperand {
+def Imm0_15AsmOperand: ImmAsmOperand<0,15> {
let Name = "Imm0_15";
let DiagnosticType = "ImmRange0_15";
}
@@ -710,7 +757,7 @@ def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
}
/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
-def Imm0_31AsmOperand: ImmAsmOperand { let Name = "Imm0_31"; }
+def Imm0_31AsmOperand: ImmAsmOperand<0,31> { let Name = "Imm0_31"; }
def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 32;
}]> {
@@ -718,15 +765,15 @@ def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
}
/// imm0_32 predicate - True if the 32-bit immediate is in the range [0,32].
-def Imm0_32AsmOperand: ImmAsmOperand { let Name = "Imm0_32"; }
+def Imm0_32AsmOperand: ImmAsmOperand<0,32> { let Name = "Imm0_32"; }
def imm0_32 : Operand<i32>, ImmLeaf<i32, [{
- return Imm >= 0 && Imm < 32;
+ return Imm >= 0 && Imm < 33;
}]> {
let ParserMatchClass = Imm0_32AsmOperand;
}
/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63].
-def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; }
+def Imm0_63AsmOperand: ImmAsmOperand<0,63> { let Name = "Imm0_63"; }
def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 64;
}]> {
@@ -734,7 +781,7 @@ def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
}
/// imm0_239 predicate - Immediate in the range [0,239].
-def Imm0_239AsmOperand : ImmAsmOperand {
+def Imm0_239AsmOperand : ImmAsmOperand<0,239> {
let Name = "Imm0_239";
let DiagnosticType = "ImmRange0_239";
}
@@ -743,13 +790,13 @@ def imm0_239 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 240; }]> {
}
/// imm0_255 predicate - Immediate in the range [0,255].
-def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; }
+def Imm0_255AsmOperand : ImmAsmOperand<0,255> { let Name = "Imm0_255"; }
def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
let ParserMatchClass = Imm0_255AsmOperand;
}
-/// imm0_65535 - An immediate is in the range [0.65535].
-def Imm0_65535AsmOperand: ImmAsmOperand { let Name = "Imm0_65535"; }
+/// imm0_65535 - An immediate is in the range [0,65535].
+def Imm0_65535AsmOperand: ImmAsmOperand<0,65535> { let Name = "Imm0_65535"; }
def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 65536;
}]> {
@@ -767,19 +814,23 @@ def imm0_65535_neg : Operand<i32>, ImmLeaf<i32, [{
// FIXME: This really needs a Thumb version separate from the ARM version.
// While the range is the same, and can thus use the same match class,
// the encoding is different so it should have a different encoder method.
-def Imm0_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm0_65535Expr"; }
+def Imm0_65535ExprAsmOperand: AsmOperandClass {
+ let Name = "Imm0_65535Expr";
+ let RenderMethod = "addImmOperands";
+}
+
def imm0_65535_expr : Operand<i32> {
let EncoderMethod = "getHiLo16ImmOpValue";
let ParserMatchClass = Imm0_65535ExprAsmOperand;
}
-def Imm256_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm256_65535Expr"; }
+def Imm256_65535ExprAsmOperand: ImmAsmOperand<256,65535> { let Name = "Imm256_65535Expr"; }
def imm256_65535_expr : Operand<i32> {
let ParserMatchClass = Imm256_65535ExprAsmOperand;
}
/// imm24b - True if the 32-bit immediate is encodable in 24 bits.
-def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; }
+def Imm24bitAsmOperand: ImmAsmOperand<0,0xffffff> { let Name = "Imm24bit"; }
def imm24b : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm <= 0xffffff;
}]> {
@@ -808,7 +859,9 @@ def imm1_32_XFORM: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, SDLoc(N),
MVT::i32);
}]>;
-def Imm1_32AsmOperand: AsmOperandClass { let Name = "Imm1_32"; }
+def Imm1_32AsmOperand: ImmAsmOperandMinusOne<1,32> {
+ let Name = "Imm1_32";
+}
def imm1_32 : Operand<i32>, PatLeaf<(imm), [{
uint64_t Imm = N->getZExtValue();
return Imm > 0 && Imm <= 32;
@@ -822,8 +875,10 @@ def imm1_16_XFORM: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, SDLoc(N),
MVT::i32);
}]>;
-def Imm1_16AsmOperand: AsmOperandClass { let Name = "Imm1_16"; }
-def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
+def Imm1_16AsmOperand: ImmAsmOperandMinusOne<1,16> { let Name = "Imm1_16"; }
+def imm1_16 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm > 0 && Imm <= 16;
+ }],
imm1_16_XFORM> {
let PrintMethod = "printImmPlusOneOperand";
let ParserMatchClass = Imm1_16AsmOperand;
@@ -1914,8 +1969,8 @@ PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
[(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
def ADJCALLSTACKDOWN :
-PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
- [(ARMcallseq_start timm:$amt)]>;
+PseudoInst<(outs), (ins i32imm:$amt, i32imm:$amt2, pred:$p), NoItinerary,
+ [(ARMcallseq_start timm:$amt, timm:$amt2)]>;
}
def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary,
@@ -1936,7 +1991,9 @@ def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>;
def : InstAlias<"esb$p", (HINT 16, pred:$p)>, Requires<[IsARM, HasRAS]>;
def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
- "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
+ "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (int_arm_sel GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -3425,8 +3482,12 @@ def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot),
(SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
+def : ARMV6Pat<(int_arm_sxtb16 GPR:$Src),
+ (SXTB16 GPR:$Src, 0)>;
def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
+def : ARMV6Pat<(int_arm_sxtab16 GPR:$LHS, GPR:$RHS),
+ (SXTAB16 GPR:$LHS, GPR:$RHS, 0)>;
// Zero extenders
@@ -3446,6 +3507,8 @@ def UXTB16 : AI_ext_rrot<0b01101100,
// (UXTB16r_rot GPR:$Src, 3)>;
def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
(UXTB16 GPR:$Src, 1)>;
+def : ARMV6Pat<(int_arm_uxtb16 GPR:$Src),
+ (UXTB16 GPR:$Src, 0)>;
def UXTAB : AI_exta_rrot<0b01101110, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
@@ -3460,6 +3523,8 @@ def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
+def : ARMV6Pat<(int_arm_uxtab16 GPR:$LHS, GPR:$RHS),
+ (UXTAB16 GPR:$LHS, GPR:$RHS, 0)>;
def SBFX : I<(outs GPRnopc:$Rd),
@@ -3586,71 +3651,85 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
let Unpredictable{11-8} = 0b1111;
}
-// Saturating add/subtract
+// Wrappers around the AAI class
+class AAIRevOpr<bits<8> op27_20, bits<8> op11_4, string opc,
+ list<dag> pattern = []>
+ : AAI<op27_20, op11_4, opc,
+ pattern,
+ (ins GPRnopc:$Rm, GPRnopc:$Rn),
+ "\t$Rd, $Rm, $Rn">;
+
+class AAIIntrinsic<bits<8> op27_20, bits<8> op11_4, string opc,
+ Intrinsic intrinsic>
+ : AAI<op27_20, op11_4, opc,
+ [(set GPRnopc:$Rd, (intrinsic GPRnopc:$Rn, GPRnopc:$Rm))]>;
+// Saturating add/subtract
+let hasSideEffects = 1 in {
+def QADD8 : AAIIntrinsic<0b01100010, 0b11111001, "qadd8", int_arm_qadd8>;
+def QADD16 : AAIIntrinsic<0b01100010, 0b11110001, "qadd16", int_arm_qadd16>;
+def QSUB16 : AAIIntrinsic<0b01100010, 0b11110111, "qsub16", int_arm_qsub16>;
+def QSUB8 : AAIIntrinsic<0b01100010, 0b11111111, "qsub8", int_arm_qsub8>;
+
+def QDADD : AAIRevOpr<0b00010100, 0b00000101, "qdadd",
+ [(set GPRnopc:$Rd, (int_arm_qadd (int_arm_qadd GPRnopc:$Rm,
+ GPRnopc:$Rm),
+ GPRnopc:$Rn))]>;
+def QDSUB : AAIRevOpr<0b00010110, 0b00000101, "qdsub",
+ [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm,
+ (int_arm_qadd GPRnopc:$Rn, GPRnopc:$Rn)))]>;
+def QSUB : AAIRevOpr<0b00010010, 0b00000101, "qsub",
+ [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))]>;
let DecoderMethod = "DecodeQADDInstruction" in
-def QADD : AAI<0b00010000, 0b00000101, "qadd",
- [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))],
- (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
-
-def QSUB : AAI<0b00010010, 0b00000101, "qsub",
- [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))],
- (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
-def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [],
- (ins GPRnopc:$Rm, GPRnopc:$Rn),
- "\t$Rd, $Rm, $Rn">;
-def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [],
- (ins GPRnopc:$Rm, GPRnopc:$Rn),
- "\t$Rd, $Rm, $Rn">;
-
-def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">;
-def QADD8 : AAI<0b01100010, 0b11111001, "qadd8">;
-def QASX : AAI<0b01100010, 0b11110011, "qasx">;
-def QSAX : AAI<0b01100010, 0b11110101, "qsax">;
-def QSUB16 : AAI<0b01100010, 0b11110111, "qsub16">;
-def QSUB8 : AAI<0b01100010, 0b11111111, "qsub8">;
-def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">;
-def UQADD8 : AAI<0b01100110, 0b11111001, "uqadd8">;
-def UQASX : AAI<0b01100110, 0b11110011, "uqasx">;
-def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">;
-def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">;
-def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">;
+ def QADD : AAIRevOpr<0b00010000, 0b00000101, "qadd",
+ [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>;
+}
+
+def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>;
+def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>;
+def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>;
+def UQSUB8 : AAIIntrinsic<0b01100110, 0b11111111, "uqsub8", int_arm_uqsub8>;
+def QASX : AAIIntrinsic<0b01100010, 0b11110011, "qasx", int_arm_qasx>;
+def QSAX : AAIIntrinsic<0b01100010, 0b11110101, "qsax", int_arm_qsax>;
+def UQASX : AAIIntrinsic<0b01100110, 0b11110011, "uqasx", int_arm_uqasx>;
+def UQSAX : AAIIntrinsic<0b01100110, 0b11110101, "uqsax", int_arm_uqsax>;
// Signed/Unsigned add/subtract
-def SASX : AAI<0b01100001, 0b11110011, "sasx">;
-def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">;
-def SADD8 : AAI<0b01100001, 0b11111001, "sadd8">;
-def SSAX : AAI<0b01100001, 0b11110101, "ssax">;
-def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">;
-def SSUB8 : AAI<0b01100001, 0b11111111, "ssub8">;
-def UASX : AAI<0b01100101, 0b11110011, "uasx">;
-def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">;
-def UADD8 : AAI<0b01100101, 0b11111001, "uadd8">;
-def USAX : AAI<0b01100101, 0b11110101, "usax">;
-def USUB16 : AAI<0b01100101, 0b11110111, "usub16">;
-def USUB8 : AAI<0b01100101, 0b11111111, "usub8">;
+def SASX : AAIIntrinsic<0b01100001, 0b11110011, "sasx", int_arm_sasx>;
+def SADD16 : AAIIntrinsic<0b01100001, 0b11110001, "sadd16", int_arm_sadd16>;
+def SADD8 : AAIIntrinsic<0b01100001, 0b11111001, "sadd8", int_arm_sadd8>;
+def SSAX : AAIIntrinsic<0b01100001, 0b11110101, "ssax", int_arm_ssax>;
+def SSUB16 : AAIIntrinsic<0b01100001, 0b11110111, "ssub16", int_arm_ssub16>;
+def SSUB8 : AAIIntrinsic<0b01100001, 0b11111111, "ssub8", int_arm_ssub8>;
+def UASX : AAIIntrinsic<0b01100101, 0b11110011, "uasx", int_arm_uasx>;
+def UADD16 : AAIIntrinsic<0b01100101, 0b11110001, "uadd16", int_arm_uadd16>;
+def UADD8 : AAIIntrinsic<0b01100101, 0b11111001, "uadd8", int_arm_uadd8>;
+def USAX : AAIIntrinsic<0b01100101, 0b11110101, "usax", int_arm_usax>;
+def USUB16 : AAIIntrinsic<0b01100101, 0b11110111, "usub16", int_arm_usub16>;
+def USUB8 : AAIIntrinsic<0b01100101, 0b11111111, "usub8", int_arm_usub8>;
// Signed/Unsigned halving add/subtract
-def SHASX : AAI<0b01100011, 0b11110011, "shasx">;
-def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">;
-def SHADD8 : AAI<0b01100011, 0b11111001, "shadd8">;
-def SHSAX : AAI<0b01100011, 0b11110101, "shsax">;
-def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">;
-def SHSUB8 : AAI<0b01100011, 0b11111111, "shsub8">;
-def UHASX : AAI<0b01100111, 0b11110011, "uhasx">;
-def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">;
-def UHADD8 : AAI<0b01100111, 0b11111001, "uhadd8">;
-def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">;
-def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">;
-def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">;
+def SHASX : AAIIntrinsic<0b01100011, 0b11110011, "shasx", int_arm_shasx>;
+def SHADD16 : AAIIntrinsic<0b01100011, 0b11110001, "shadd16", int_arm_shadd16>;
+def SHADD8 : AAIIntrinsic<0b01100011, 0b11111001, "shadd8", int_arm_shadd8>;
+def SHSAX : AAIIntrinsic<0b01100011, 0b11110101, "shsax", int_arm_shsax>;
+def SHSUB16 : AAIIntrinsic<0b01100011, 0b11110111, "shsub16", int_arm_shsub16>;
+def SHSUB8 : AAIIntrinsic<0b01100011, 0b11111111, "shsub8", int_arm_shsub8>;
+def UHASX : AAIIntrinsic<0b01100111, 0b11110011, "uhasx", int_arm_uhasx>;
+def UHADD16 : AAIIntrinsic<0b01100111, 0b11110001, "uhadd16", int_arm_uhadd16>;
+def UHADD8 : AAIIntrinsic<0b01100111, 0b11111001, "uhadd8", int_arm_uhadd8>;
+def UHSAX : AAIIntrinsic<0b01100111, 0b11110101, "uhsax", int_arm_uhsax>;
+def UHSUB16 : AAIIntrinsic<0b01100111, 0b11110111, "uhsub16", int_arm_uhsub16>;
+def UHSUB8 : AAIIntrinsic<0b01100111, 0b11111111, "uhsub8", int_arm_uhsub8>;
// Unsigned Sum of Absolute Differences [and Accumulate].
def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
MulFrm /* for convenience */, NoItinerary, "usad8",
- "\t$Rd, $Rn, $Rm", []>,
+ "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (int_arm_usad8 GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
@@ -3664,7 +3743,8 @@ def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
}
def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
MulFrm /* for convenience */, NoItinerary, "usada8",
- "\t$Rd, $Rn, $Rm, $Ra", []>,
+ "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (int_arm_usada8 GPR:$Rn, GPR:$Rm, GPR:$Ra))]>,
Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]>{
bits<4> Rd;
bits<4> Rn;
@@ -3679,7 +3759,6 @@ def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
}
// Signed/Unsigned saturate
-
def SSAT : AI<(outs GPRnopc:$Rd),
(ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>,
@@ -3748,6 +3827,10 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos),
(USAT imm0_31:$pos, GPRnopc:$a, 0)>;
def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm),
(SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
+def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos),
+ (SSAT16 imm1_16:$pos, GPRnopc:$a)>;
+def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos),
+ (USAT16 imm0_15:$pos, GPRnopc:$a)>;
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
@@ -3850,6 +3933,7 @@ def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm,
let Inst{11-0} = imm;
}
+let AddedComplexity = 1 in
def : ARMPat<(and GPR:$src, mod_imm_not:$imm),
(BICri GPR:$src, mod_imm_not:$imm)>;
@@ -3899,7 +3983,8 @@ def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm),
IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
[(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
let Inst{15-12} = 0b0000;
let Unpredictable{15-12} = 0b1111;
}
@@ -3910,14 +3995,16 @@ def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm,
4, IIC_iMUL32,
[(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
(MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6, UseMulOps]>;
+ Requires<[IsARM, NoV6, UseMulOps]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
}
def MLA : AsMul1I32<0b0000001, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra),
IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))]>,
- Requires<[IsARM, HasV6, UseMulOps]> {
+ Requires<[IsARM, HasV6, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
bits<4> Ra;
let Inst{15-12} = Ra;
}
@@ -3928,12 +4015,14 @@ def MLAv5: ARMPseudoExpand<(outs GPRnopc:$Rd),
pred:$p, cc_out:$s), 4, IIC_iMAC32,
[(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))],
(MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
- Requires<[IsARM, HasV6T2, UseMulOps]> {
+ Requires<[IsARM, HasV6T2, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
bits<4> Rd;
bits<4> Rm;
bits<4> Rn;
@@ -3949,26 +4038,38 @@ let hasSideEffects = 0 in {
let isCommutable = 1 in {
def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
- "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]>;
+ "smull", "\t$RdLo, $RdHi, $Rn, $Rm",
+ [(set GPR:$RdLo, GPR:$RdHi,
+ (smullohi GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>;
def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
- "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]>;
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm",
+ [(set GPR:$RdLo, GPR:$RdHi,
+ (umullohi GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL]>;
let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
- 4, IIC_iMUL64, [],
+ 4, IIC_iMUL64,
+ [(set GPR:$RdLo, GPR:$RdHi,
+ (smullohi GPR:$Rn, GPR:$Rm))],
(SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6]>,
+ Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>;
def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
- 4, IIC_iMUL64, [],
+ 4, IIC_iMUL64,
+ [(set GPR:$RdLo, GPR:$RdHi,
+ (umullohi GPR:$Rn, GPR:$Rm))],
(UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6]>,
+ Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>;
}
}
@@ -3976,17 +4077,20 @@ def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
"smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
"umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
IIC_iMAC64,
"umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]> {
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]> {
bits<4> RdLo;
bits<4> RdHi;
bits<4> Rm;
@@ -4004,13 +4108,15 @@ def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
4, IIC_iMAC64, [],
(SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s),
4, IIC_iMAC64, [],
(UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
}
} // hasSideEffects
@@ -4019,13 +4125,15 @@ def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
let Inst{15-12} = 0b1111;
}
def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
let Inst{15-12} = 0b1111;
}
@@ -4033,57 +4141,67 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
- Requires<[IsARM, HasV6, UseMulOps]>;
+ Requires<[IsARM, HasV6, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsARM, HasV6, UseMulOps]>;
+ Requires<[IsARM, HasV6, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
multiclass AI_smul<string opc> {
def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
(sext_inreg GPR:$Rm, i16)))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
(sra GPR:$Rm, (i32 16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
(sext_inreg GPR:$Rm, i16)))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
(sra GPR:$Rm, (i32 16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
- []>,
- Requires<[IsARM, HasV5TE]>;
+ [(set GPR:$Rd, (ARMsmulwb GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
- []>,
- Requires<[IsARM, HasV5TE]>;
+ [(set GPR:$Rd, (ARMsmulwt GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
}
@@ -4095,7 +4213,8 @@ multiclass AI_smla<string opc> {
[(set GPRnopc:$Rd, (add GPR:$Ra,
(mul (sext_inreg GPRnopc:$Rn, i16),
(sext_inreg GPRnopc:$Rm, i16))))]>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -4103,7 +4222,8 @@ multiclass AI_smla<string opc> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16),
(sra GPRnopc:$Rm, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -4111,7 +4231,8 @@ multiclass AI_smla<string opc> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
(sext_inreg GPRnopc:$Rm, i16))))]>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -4119,19 +4240,24 @@ multiclass AI_smla<string opc> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
(sra GPRnopc:$Rm, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
- []>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (ARMsmulwb GPRnopc:$Rn, GPRnopc:$Rm)))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
- []>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (ARMsmulwt GPRnopc:$Rn, GPRnopc:$Rm)))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
}
}
@@ -4139,30 +4265,34 @@ defm SMUL : AI_smul<"smul">;
defm SMLA : AI_smla<"smla">;
// Halfword multiply accumulate long: SMLAL<x><y>.
-def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm),
- IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV5TE]>;
-
-def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm),
- IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV5TE]>;
-
-def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm),
- IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV5TE]>;
-
-def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm),
- IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV5TE]>;
+class SMLAL<bits<2> opc1, string asm>
+ : AMulxyI64<0b0001010, opc1,
+ (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
+ IIC_iMAC64, asm, "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
+
+def SMLALBB : SMLAL<0b00, "smlalbb">;
+def SMLALBT : SMLAL<0b10, "smlalbt">;
+def SMLALTB : SMLAL<0b01, "smlaltb">;
+def SMLALTT : SMLAL<0b11, "smlaltt">;
+
+def : ARMV5TEPat<(ARMsmlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (SMLALBB $Rn, $Rm, $RLo, $RHi)>;
+def : ARMV5TEPat<(ARMsmlalbt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (SMLALBT $Rn, $Rm, $RLo, $RHi)>;
+def : ARMV5TEPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (SMLALTB $Rn, $Rm, $RLo, $RHi)>;
+def : ARMV5TEPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (SMLALTT $Rn, $Rm, $RLo, $RHi)>;
// Helper class for AI_smld.
class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
InstrItinClass itin, string opc, string asm>
- : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
+ : AI<oops, iops, MulFrm, itin, opc, asm, []>,
+ Requires<[IsARM, HasV6]> {
bits<4> Rn;
bits<4> Rm;
let Inst{27-23} = 0b01110;
@@ -4203,48 +4333,85 @@ multiclass AI_smld<bit sub, string opc> {
def D : AMulDualIa<0, sub, 0, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
- NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">;
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
- NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">;
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
- !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">;
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
+ NoItinerary,
+ !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
- !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">;
-
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
+ NoItinerary,
+ !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
+ Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>;
}
defm SMLA : AI_smld<0, "smla">;
defm SMLS : AI_smld<1, "smls">;
+def : ARMV6Pat<(int_arm_smlad GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ (SMLAD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>;
+def : ARMV6Pat<(int_arm_smladx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ (SMLADX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>;
+def : ARMV6Pat<(int_arm_smlsd GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ (SMLSD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>;
+def : ARMV6Pat<(int_arm_smlsdx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ (SMLSDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>;
+def : ARMV6Pat<(ARMSmlald GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
+ (SMLALD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>;
+def : ARMV6Pat<(ARMSmlaldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
+ (SMLALDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>;
+def : ARMV6Pat<(ARMSmlsld GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
+ (SMLSLD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>;
+def : ARMV6Pat<(ARMSmlsldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
+ (SMLSLDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>;
+
multiclass AI_sdml<bit sub, string opc> {
def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm),
- NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm),
- NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
}
defm SMUA : AI_sdml<0, "smua">;
defm SMUS : AI_sdml<1, "smus">;
+def : ARMV6Pat<(int_arm_smuad GPRnopc:$Rn, GPRnopc:$Rm),
+ (SMUAD GPRnopc:$Rn, GPRnopc:$Rm)>;
+def : ARMV6Pat<(int_arm_smuadx GPRnopc:$Rn, GPRnopc:$Rm),
+ (SMUADX GPRnopc:$Rn, GPRnopc:$Rm)>;
+def : ARMV6Pat<(int_arm_smusd GPRnopc:$Rn, GPRnopc:$Rm),
+ (SMUSD GPRnopc:$Rn, GPRnopc:$Rm)>;
+def : ARMV6Pat<(int_arm_smusdx GPRnopc:$Rn, GPRnopc:$Rm),
+ (SMUSDX GPRnopc:$Rn, GPRnopc:$Rm)>;
+
//===----------------------------------------------------------------------===//
// Division Instructions (ARMv7-A with virtualization extension)
//
def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
"sdiv", "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>,
- Requires<[IsARM, HasDivideInARM]>;
+ Requires<[IsARM, HasDivideInARM]>,
+ Sched<[WriteDIV]>;
def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
"udiv", "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>,
- Requires<[IsARM, HasDivideInARM]>;
+ Requires<[IsARM, HasDivideInARM]>,
+ Sched<[WriteDIV]>;
//===----------------------------------------------------------------------===//
// Misc. Arithmetic Instructions.
@@ -4831,14 +4998,15 @@ let AddedComplexity = 8 in {
def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>;
}
-// SWP/SWPB are deprecated in V6/V7.
+// SWP/SWPB are deprecated in V6/V7 and optional in v7VE.
+// FIXME Use InstAlias to generate LDREX/STREX pairs instead.
let mayLoad = 1, mayStore = 1 in {
def SWP : AIswp<0, (outs GPRnopc:$Rt),
(ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>,
- Requires<[PreV8]>;
+ Requires<[IsARM,PreV8]>;
def SWPB: AIswp<1, (outs GPRnopc:$Rt),
(ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>,
- Requires<[PreV8]>;
+ Requires<[IsARM,PreV8]>;
}
//===----------------------------------------------------------------------===//
@@ -4850,7 +5018,7 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
- Requires<[PreV8]> {
+ Requires<[IsARM,PreV8]> {
bits<4> opc1;
bits<4> CRn;
bits<4> CRd;
@@ -4872,7 +5040,7 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
- Requires<[PreV8]> {
+ Requires<[IsARM,PreV8]> {
let Inst{31-28} = 0b1111;
bits<4> opc1;
bits<4> CRn;
@@ -5048,13 +5216,13 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm, list<dag> pattern> {
defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
-defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
-defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
+defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
+defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
-defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
-defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
+defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
+defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
//===----------------------------------------------------------------------===//
// Move between coprocessor and ARM core register.
@@ -5132,7 +5300,7 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
- Requires<[PreV8]>;
+ Requires<[IsARM,PreV8]>;
def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
(MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0)>;
@@ -5140,7 +5308,7 @@ def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
(outs GPRwithAPSR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>,
- Requires<[PreV8]>;
+ Requires<[IsARM,PreV8]>;
def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
(MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0)>;
@@ -5183,7 +5351,7 @@ class MovRRCopro2<string opc, bit direction, dag oops, dag iops,
list<dag> pattern = []>
: ABXI<0b1100, oops, iops, NoItinerary,
!strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern>,
- Requires<[PreV8]> {
+ Requires<[IsARM,PreV8]> {
let Inst{31-28} = 0b1111;
let Inst{23-21} = 0b010;
let Inst{20} = direction;
@@ -5525,20 +5693,52 @@ def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
// smul* and smla*
def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
- (SMULBB GPR:$a, GPR:$b)>;
+ (SMULBB GPR:$a, GPR:$b)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
- (SMULBT GPR:$a, GPR:$b)>;
+ (SMULBT GPR:$a, GPR:$b)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
- (SMULTB GPR:$a, GPR:$b)>;
+ (SMULTB GPR:$a, GPR:$b)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, sext_16_node:$b)),
- (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
- (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
+
+def : ARMV5TEPat<(int_arm_smulbb GPR:$a, GPR:$b),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(int_arm_smulbt GPR:$a, GPR:$b),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(int_arm_smultb GPR:$a, GPR:$b),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(int_arm_smultt GPR:$a, GPR:$b),
+ (SMULTT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(int_arm_smulwb GPR:$a, GPR:$b),
+ (SMULWB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(int_arm_smulwt GPR:$a, GPR:$b),
+ (SMULWT GPR:$a, GPR:$b)>;
+
+def : ARMV5TEPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(int_arm_smlabt GPR:$a, GPR:$b, GPR:$acc),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(int_arm_smlatb GPR:$a, GPR:$b, GPR:$acc),
(SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(int_arm_smlatt GPR:$a, GPR:$b, GPR:$acc),
+ (SMLATT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(int_arm_smlawb GPR:$a, GPR:$b, GPR:$acc),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(int_arm_smlawt GPR:$a, GPR:$b, GPR:$acc),
+ (SMLAWT GPR:$a, GPR:$b, GPR:$acc)>;
// Pre-v7 uses MCR for synchronization barriers.
def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
@@ -5717,33 +5917,49 @@ def : MnemonicAlias<"usubaddx", "usax">;
// "mov Rd, mod_imm_not" can be handled via "mvn" in assembly, just like
// for isel.
-def : ARMInstAlias<"mov${s}${p} $Rd, $imm",
+def : ARMInstSubst<"mov${s}${p} $Rd, $imm",
(MVNi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
+def : ARMInstSubst<"mvn${s}${p} $Rd, $imm",
(MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
// Same for AND <--> BIC
-def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+def : ARMInstSubst<"bic${s}${p} $Rd, $Rn, $imm",
(ANDri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
+def : ARMInstSubst<"bic${s}${p} $Rdn, $imm",
(ANDri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
+def : ARMInstSubst<"and${s}${p} $Rd, $Rn, $imm",
(BICri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
+def : ARMInstSubst<"and${s}${p} $Rdn, $imm",
(BICri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
// Likewise, "add Rd, mod_imm_neg" -> sub
-def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm",
+def : ARMInstSubst<"add${s}${p} $Rd, $Rn, $imm",
(SUBri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"add${s}${p} $Rd, $imm",
+def : ARMInstSubst<"add${s}${p} $Rd, $imm",
(SUBri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
+// Likewise, "sub Rd, mod_imm_neg" -> add
+def : ARMInstSubst<"sub${s}${p} $Rd, $Rn, $imm",
+ (ADDri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstSubst<"sub${s}${p} $Rd, $imm",
+ (ADDri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
+
+
+def : ARMInstSubst<"adc${s}${p} $Rd, $Rn, $imm",
+ (SBCri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstSubst<"adc${s}${p} $Rdn, $imm",
+ (SBCri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstSubst<"sbc${s}${p} $Rd, $Rn, $imm",
+ (ADCri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstSubst<"sbc${s}${p} $Rdn, $imm",
+ (ADCri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
+
// Same for CMP <--> CMN via mod_imm_neg
-def : ARMInstAlias<"cmp${p} $Rd, $imm",
+def : ARMInstSubst<"cmp${p} $Rd, $imm",
(CMNri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>;
-def : ARMInstAlias<"cmn${p} $Rd, $imm",
+def : ARMInstSubst<"cmn${p} $Rd, $imm",
(CMPri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>;
// The shifter forms of the MOV instruction are aliased to the ASR, LSL,
@@ -5837,21 +6053,28 @@ def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
// significantly more naive than the standard expansion: we conservatively
// assume seq_cst, strong cmpxchg and omit clrex on failure.
-let Constraints = "@earlyclobber $Rd,@earlyclobber $status",
+let Constraints = "@earlyclobber $Rd,@earlyclobber $temp",
mayLoad = 1, mayStore = 1 in {
-def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$status),
+def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$temp),
(ins GPR:$addr, GPR:$desired, GPR:$new),
NoItinerary, []>, Sched<[]>;
-def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$status),
+def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$temp),
(ins GPR:$addr, GPR:$desired, GPR:$new),
NoItinerary, []>, Sched<[]>;
-def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$status),
+def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$temp),
(ins GPR:$addr, GPR:$desired, GPR:$new),
NoItinerary, []>, Sched<[]>;
-def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status),
+def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp),
(ins GPR:$addr, GPRPair:$desired, GPRPair:$new),
NoItinerary, []>, Sched<[]>;
}
+
+def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary,
+ [(atomic_fence imm:$ordering, 0)]> {
+ let hasSideEffects = 1;
+ let Size = 0;
+ let AsmString = "@ COMPILER BARRIER";
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index b5fa8e9..858136a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -587,6 +587,14 @@ def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
+ SDTCisVT<2, v8i8>]>;
+def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
+ SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
+def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
+def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
+
+
def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
unsigned EltBits = 0;
@@ -666,7 +674,7 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
(ins AddrMode:$Rn), IIC_VLD1,
- "vld1", Dt, "$Vd, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -674,7 +682,7 @@ class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
(ins AddrMode:$Rn), IIC_VLD1x2,
- "vld1", Dt, "$Vd, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -695,7 +703,7 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -703,7 +711,7 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -712,7 +720,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -720,7 +728,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -739,7 +747,7 @@ defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
(ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
- "$Vd, $Rn", "", []> {
+ "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -748,7 +756,7 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -756,7 +764,7 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -772,15 +780,15 @@ defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
-def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
-def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>;
-def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
// ...with 4 registers
class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
(ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
- "$Vd, $Rn", "", []> {
+ "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -789,7 +797,7 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -797,7 +805,7 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -813,9 +821,9 @@ defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
-def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
-def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>;
-def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
@@ -829,22 +837,22 @@ class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
}
def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
-def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
-def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
-def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
+def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
+def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
// ...with address register writeback:
multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
@@ -867,45 +875,45 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
}
defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
-def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
-def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
-def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
-def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
-def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
-def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
+def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
+def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
+def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
+def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
+def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
// ...with double-spaced registers
def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$Rn), IIC_VLD3,
- "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
@@ -915,9 +923,9 @@ def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
-def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>;
-def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
-def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
+def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
+def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
// ...with address register writeback:
class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -925,7 +933,7 @@ class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
}
@@ -934,9 +942,9 @@ def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
-def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
-def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
-def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
// ...with double-spaced registers:
def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
@@ -946,25 +954,26 @@ def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
-def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
-def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
-def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
// ...alternate versions to be allocated odd register numbers:
-def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
-def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
-def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
+def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
+def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
-def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
-def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
-def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
// VLD4 : Vector Load (multiple 4-element structures)
class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$Rn), IIC_VLD4,
- "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>,
+ Sched<[WriteVLD4]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
@@ -974,9 +983,9 @@ def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
-def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>;
-def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
-def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
+def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
+def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
// ...with address register writeback:
class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -984,7 +993,7 @@ class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
}
@@ -993,9 +1002,9 @@ def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
-def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
-def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
-def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
// ...with double-spaced registers:
def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
@@ -1005,18 +1014,18 @@ def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
-def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
-def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
-def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
// ...alternate versions to be allocated odd register numbers:
-def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
-def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
-def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
+def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
+def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
-def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
-def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
-def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
@@ -1068,11 +1077,12 @@ class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
"$src = $Vd",
[(set DPR:$Vd, (vector_insert (Ty DPR:$src),
(i32 (LoadOp addrmode6oneL32:$Rn)),
- imm:$lane))]> {
+ imm:$lane))]>, Sched<[WriteVLD1]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVLD1LN";
}
-class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
+class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>,
+ Sched<[WriteVLD1]> {
let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
(i32 (LoadOp addrmode6:$addr)),
imm:$lane))];
@@ -1109,7 +1119,7 @@ class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
"\\{$Vd[$lane]\\}, $Rn$Rm",
- "$src = $Vd, $Rn.addr = $wb", []> {
+ "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let DecoderMethod = "DecodeVLD1LN";
}
@@ -1126,16 +1136,16 @@ def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
let Inst{4} = Rn{4};
}
-def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
-def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
-def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
+def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
+def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
// VLD2LN : Vector Load (single 2-element structure to one lane)
class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
(ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
- "$src1 = $Vd, $src2 = $dst2", []> {
+ "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2LN";
@@ -1151,9 +1161,9 @@ def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
-def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
-def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
-def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
+def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
+def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
// ...with double-spaced registers:
def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
@@ -1163,8 +1173,8 @@ def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
let Inst{7} = lane{0};
}
-def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
-def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
+def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
// ...with address register writeback:
class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1187,9 +1197,9 @@ def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
-def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
-def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
-def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
+def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
+def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -1198,8 +1208,8 @@ def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
let Inst{7} = lane{0};
}
-def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
-def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
+def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
// VLD3LN : Vector Load (single 3-element structure to one lane)
class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1207,7 +1217,7 @@ class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
- "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVLD3LN";
}
@@ -1222,9 +1232,9 @@ def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
-def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
-def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
-def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
+def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
+def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
// ...with double-spaced registers:
def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
@@ -1234,8 +1244,8 @@ def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
let Inst{7} = lane{0};
}
-def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
-def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
+def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
// ...with address register writeback:
class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1246,7 +1256,7 @@ class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
IIC_VLD3lnu, "vld3", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
- []> {
+ []>, Sched<[WriteVLD2]> {
let DecoderMethod = "DecodeVLD3LN";
}
@@ -1260,9 +1270,9 @@ def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
-def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
-def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
-def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
+def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
+def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -1271,8 +1281,8 @@ def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
let Inst{7} = lane{0};
}
-def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
-def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
+def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
// VLD4LN : Vector Load (single 4-element structure to one lane)
class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1281,7 +1291,8 @@ class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
- "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>,
+ Sched<[WriteVLD2]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4LN";
@@ -1298,9 +1309,9 @@ def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
-def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
-def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
+def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
+def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
// ...with double-spaced registers:
def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
@@ -1311,8 +1322,8 @@ def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
-def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
+def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
// ...with address register writeback:
class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1339,9 +1350,9 @@ def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
-def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
-def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
+def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
+def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -1351,8 +1362,8 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
-def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
+def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
@@ -1363,7 +1374,8 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
(ins AddrMode:$Rn),
IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
[(set VecListOneDAllLanes:$Vd,
- (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
+ (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
+ Sched<[WriteVLD2]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1426,7 +1438,7 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
(outs VecListDPairAllLanes:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1483,7 +1495,7 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
(outs VdTy:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD2dupu,
"vld2", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
@@ -1492,7 +1504,7 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
(outs VdTy:$Vd, GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
"vld2", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
}
@@ -1516,7 +1528,8 @@ defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
class VLD3DUP<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
(ins addrmode6dup:$Rn), IIC_VLD3dup,
- "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>,
+ Sched<[WriteVLD2]> {
let Rm = 0b1111;
let Inst{4} = 0;
let DecoderMethod = "DecodeVLD3DupInstruction";
@@ -1526,9 +1539,9 @@ def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
-def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>;
-def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
-def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
+def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
+def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
// ...with double-spaced registers (not used for codegen):
def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
@@ -1540,7 +1553,7 @@ class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
"vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
let Inst{4} = 0;
let DecoderMethod = "DecodeVLD3DupInstruction";
}
@@ -1553,9 +1566,9 @@ def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>;
def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
-def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
-def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
-def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
+def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
+def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
// VLD4DUP : Vector Load (single 4-element structure to all lanes)
class VLD4DUP<bits<4> op7_4, string Dt>
@@ -1572,9 +1585,9 @@ def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
-def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>;
-def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
-def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
+def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
+def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
// ...with double-spaced registers (not used for codegen):
def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
@@ -1587,7 +1600,7 @@ class VLD4DUPWB<bits<4> op7_4, string Dt>
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
"vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4DupInstruction";
}
@@ -1600,9 +1613,9 @@ def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
-def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
-def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
-def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
+def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
+def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
@@ -1649,14 +1662,14 @@ class VSTQQQQWBPseudo<InstrItinClass itin>
// VST1 : Vector Store (multiple single elements)
class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
- IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
+ IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
- IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
+ IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1677,7 +1690,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
"vst1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1686,7 +1699,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
(ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
IIC_VLD1u,
"vst1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -1695,7 +1708,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1704,7 +1717,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
(ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -1724,7 +1737,7 @@ defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
(ins AddrMode:$Rn, VecListThreeD:$Vd),
- IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
+ IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1733,7 +1746,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
"vst1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1742,7 +1755,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
(ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
IIC_VLD1x3u,
"vst1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -1758,16 +1771,16 @@ defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
-def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
-def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>;
-def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
+def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
+def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
// ...with 4 registers
class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
(ins AddrMode:$Rn, VecListFourD:$Vd),
IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
- []> {
+ []>, Sched<[WriteVST4]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1776,7 +1789,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
"vst1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1785,7 +1798,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
(ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
IIC_VLD1x4u,
"vst1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -1801,9 +1814,9 @@ defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
-def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
-def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>;
-def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
+def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
+def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
// VST2 : Vector Store (multiple 2-element structures)
class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
@@ -1816,22 +1829,22 @@ class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
}
def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVST2]>;
def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVST2]>;
def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVST2]>;
def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVST4]>;
def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVST4]>;
def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVST4]>;
-def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
-def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
-def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
+def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
+def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
// ...with address register writeback:
multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
@@ -1839,7 +1852,7 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
@@ -1847,7 +1860,7 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
@@ -1856,7 +1869,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
@@ -1865,7 +1878,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
(ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
@@ -1882,12 +1895,12 @@ defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
-def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
-def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
-def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
-def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
-def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
-def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
+def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
+def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
+def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
+def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
+def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
// ...with double-spaced registers
def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2,
@@ -1907,7 +1920,7 @@ defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
- "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
@@ -1917,9 +1930,9 @@ def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
-def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>;
-def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
-def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
+def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
+def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
// ...with address register writeback:
class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1927,7 +1940,7 @@ class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
}
@@ -1936,9 +1949,9 @@ def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
-def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
-def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
-def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
// ...with double-spaced registers:
def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
@@ -1948,25 +1961,25 @@ def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
-def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
-def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
-def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
// ...alternate versions to be allocated odd register numbers:
-def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>;
-def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>;
-def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
+def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
+def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
-def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
-def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
-def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
// VST4 : Vector Store (multiple 4-element structures)
class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
- "", []> {
+ "", []>, Sched<[WriteVST4]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
@@ -1976,9 +1989,9 @@ def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
-def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>;
-def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
-def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
+def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
+def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
// ...with address register writeback:
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1986,7 +1999,7 @@ class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
"vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
}
@@ -1995,9 +2008,9 @@ def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
-def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
-def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
-def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
// ...with double-spaced registers:
def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
@@ -2007,18 +2020,18 @@ def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
-def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
-def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
-def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
// ...alternate versions to be allocated odd register numbers:
-def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>;
-def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>;
-def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
+def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
+def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
-def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
-def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
-def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
@@ -2052,12 +2065,13 @@ class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
- [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> {
+ [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>,
+ Sched<[WriteVST1]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVST1LN";
}
class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
- : VSTQLNPseudo<IIC_VST1ln> {
+ : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> {
let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
addrmode6:$addr)];
}
@@ -2096,11 +2110,12 @@ class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
"\\{$Vd[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb",
[(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
- AdrMode:$Rn, am6offset:$Rm))]> {
+ AdrMode:$Rn, am6offset:$Rm))]>,
+ Sched<[WriteVST1]> {
let DecoderMethod = "DecodeVST1LN";
}
class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
- : VSTQLNWBPseudo<IIC_VST1lnu> {
+ : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> {
let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
addrmode6:$addr, am6offset:$offset))];
}
@@ -2131,7 +2146,7 @@ class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
- "", []> {
+ "", []>, Sched<[WriteVST1]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST2LN";
@@ -2147,9 +2162,9 @@ def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
-def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>;
-def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
-def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
+def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
+def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
// ...with double-spaced registers:
def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
@@ -2161,8 +2176,8 @@ def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
let Inst{4} = Rn{4};
}
-def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
-def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
+def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
// ...with address register writeback:
class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -2185,9 +2200,9 @@ def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
-def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
-def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
-def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
+def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
+def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -2196,15 +2211,16 @@ def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
let Inst{7} = lane{0};
}
-def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
-def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
+def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
// VST3LN : Vector Store (single 3-element structure from one lane)
class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
- "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>,
+ Sched<[WriteVST2]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVST3LN";
}
@@ -2219,9 +2235,9 @@ def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
-def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
-def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
-def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
+def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
+def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
// ...with double-spaced registers:
def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
@@ -2255,9 +2271,9 @@ def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
-def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
-def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
-def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
+def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
+def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -2266,8 +2282,8 @@ def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
let Inst{7} = lane{0};
}
-def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
-def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
+def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
// VST4LN : Vector Store (single 4-element structure from one lane)
class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -2275,7 +2291,7 @@ class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
"\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
- "", []> {
+ "", []>, Sched<[WriteVST2]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST4LN";
@@ -2292,9 +2308,9 @@ def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
-def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
-def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
+def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
+def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
// ...with double-spaced registers:
def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
@@ -2305,8 +2321,8 @@ def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
-def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
+def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
// ...with address register writeback:
class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -2331,9 +2347,9 @@ def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
-def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
-def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
+def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
+def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -2343,8 +2359,8 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
-def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
+def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
@@ -5550,8 +5566,7 @@ defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
// VABS : Vector Absolute Value
defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
- IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
- int_arm_neon_vabs>;
+ IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>;
def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
"vabs", "f32",
v2f32, v2f32, fabs>;
@@ -5567,29 +5582,6 @@ def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
v8f16, v8f16, fabs>,
Requires<[HasNEON, HasFullFP16]>;
-def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))),
- (v2i32 (bitconvert (v8i8 (add DPR:$src,
- (NEONvshrs DPR:$src, (i32 7))))))),
- (VABSv8i8 DPR:$src)>;
-def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))),
- (v2i32 (bitconvert (v4i16 (add DPR:$src,
- (NEONvshrs DPR:$src, (i32 15))))))),
- (VABSv4i16 DPR:$src)>;
-def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))),
- (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))),
- (VABSv2i32 DPR:$src)>;
-def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))),
- (v4i32 (bitconvert (v16i8 (add QPR:$src,
- (NEONvshrs QPR:$src, (i32 7))))))),
- (VABSv16i8 QPR:$src)>;
-def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))),
- (v4i32 (bitconvert (v8i16 (add QPR:$src,
- (NEONvshrs QPR:$src, (i32 15))))))),
- (VABSv8i16 QPR:$src)>;
-def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))),
- (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))),
- (VABSv4i32 QPR:$src)>;
-
// VQABS : Vector Saturating Absolute Value
defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
@@ -6443,7 +6435,8 @@ def VTBL1
: N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
(ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
"vtbl", "8", "$Vd, $Vn, $Vm", "",
- [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
+ [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
+
let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
@@ -6498,6 +6491,49 @@ def VTBX4Pseudo
IIC_VTBX4, "$orig = $dst", []>;
} // DecoderMethod = "DecodeTBLInstruction"
+def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
+ (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1),
+ v8i8:$Vm))>;
+def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vm)),
+ (v8i8 (VTBX2 v8i8:$orig,
+ (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1),
+ v8i8:$Vm))>;
+
+def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vm)),
+ (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ (v8i8 (IMPLICIT_DEF)), dsub_3),
+ v8i8:$Vm))>;
+def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vm)),
+ (v8i8 (VTBX3Pseudo v8i8:$orig,
+ (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ (v8i8 (IMPLICIT_DEF)), dsub_3),
+ v8i8:$Vm))>;
+
+def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
+ (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ v8i8:$Vn3, dsub_3),
+ v8i8:$Vm))>;
+def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
+ (v8i8 (VTBX4Pseudo v8i8:$orig,
+ (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ v8i8:$Vn3, dsub_3),
+ v8i8:$Vm))>;
+
// VRINT : Vector Rounding
multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
@@ -7139,6 +7175,17 @@ let Predicates = [IsBE] in {
(f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
}
+def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+
//===----------------------------------------------------------------------===//
// Assembler aliases
//
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index a681f64..891a8f4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -19,7 +19,7 @@ def imm_sr_XFORM: SDNodeXForm<imm, [{
unsigned Imm = N->getZExtValue();
return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32);
}]>;
-def ThumbSRImmAsmOperand: AsmOperandClass { let Name = "ImmThumbSR"; }
+def ThumbSRImmAsmOperand: ImmAsmOperand<1,32> { let Name = "ImmThumbSR"; }
def imm_sr : Operand<i32>, PatLeaf<(imm), [{
uint64_t Imm = N->getZExtValue();
return Imm > 0 && Imm <= 32;
@@ -28,22 +28,31 @@ def imm_sr : Operand<i32>, PatLeaf<(imm), [{
let ParserMatchClass = ThumbSRImmAsmOperand;
}
-def imm_comp_XFORM : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), SDLoc(N),
- MVT::i32);
-}]>;
-
def imm0_7_neg : PatLeaf<(i32 imm), [{
return (uint32_t)-N->getZExtValue() < 8;
}], imm_neg_XFORM>;
+def ThumbModImmNeg1_7AsmOperand : AsmOperandClass { let Name = "ThumbModImmNeg1_7"; }
+def mod_imm1_7_neg : Operand<i32>, PatLeaf<(imm), [{
+ unsigned Value = -(unsigned)N->getZExtValue();
+ return 0 < Value && Value < 8;
+ }], imm_neg_XFORM> {
+ let ParserMatchClass = ThumbModImmNeg1_7AsmOperand;
+}
+
+def ThumbModImmNeg8_255AsmOperand : AsmOperandClass { let Name = "ThumbModImmNeg8_255"; }
+def mod_imm8_255_neg : Operand<i32>, PatLeaf<(imm), [{
+ unsigned Value = -(unsigned)N->getZExtValue();
+ return 7 < Value && Value < 256;
+ }], imm_neg_XFORM> {
+ let ParserMatchClass = ThumbModImmNeg8_255AsmOperand;
+}
+
+
def imm0_255_comp : PatLeaf<(i32 imm), [{
return ~((uint32_t)N->getZExtValue()) < 256;
}]>;
-def imm8_255 : ImmLeaf<i32, [{
- return Imm >= 8 && Imm < 256;
-}]>;
def imm8_255_neg : PatLeaf<(i32 imm), [{
unsigned Val = -N->getZExtValue();
return Val >= 8 && Val < 256;
@@ -275,8 +284,8 @@ def tADJCALLSTACKUP :
Requires<[IsThumb, IsThumb1Only]>;
def tADJCALLSTACKDOWN :
- PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
- [(ARMcallseq_start imm:$amt)]>,
+ PseudoInst<(outs), (ins i32imm:$amt, i32imm:$amt2), NoItinerary,
+ [(ARMcallseq_start imm:$amt, imm:$amt2)]>,
Requires<[IsThumb, IsThumb1Only]>;
}
@@ -407,9 +416,9 @@ def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
let DecoderMethod = "DecodeThumbAddSPImm";
}
-def : tInstAlias<"add${p} sp, $imm",
+def : tInstSubst<"add${p} sp, $imm",
(tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
-def : tInstAlias<"add${p} sp, sp, $imm",
+def : tInstSubst<"add${p} sp, sp, $imm",
(tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
// Can optionally specify SP as a three operand instruction.
@@ -910,7 +919,7 @@ let isAdd = 1 in {
def tADC : // A8.6.2
T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
"adc", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+ []>, Sched<[WriteALU]>;
// Add immediate
def tADDi3 : // A8.6.4 T1
@@ -938,6 +947,43 @@ let isAdd = 1 in {
"add", "\t$Rd, $Rn, $Rm",
[(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+ /// Similar to the above except these set the 's' bit so the
+ /// instruction modifies the CPSR register.
+ ///
+ /// These opcodes will be converted to the real non-S opcodes by
+ /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+ let hasPostISelHook = 1, Defs = [CPSR] in {
+ let isCommutable = 1, Uses = [CPSR] in
+ def tADCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ 2, IIC_iALUr,
+ [(set tGPR:$Rdn, CPSR, (ARMadde tGPR:$Rn, tGPR:$Rm,
+ CPSR))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ def tADDSi3 : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
+ 2, IIC_iALUi,
+ [(set tGPR:$Rd, CPSR, (ARMaddc tGPR:$Rm,
+ imm0_7:$imm3))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ def tADDSi8 : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8),
+ 2, IIC_iALUi,
+ [(set tGPR:$Rdn, CPSR, (ARMaddc tGPR:$Rn,
+ imm8_255:$imm8))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ let isCommutable = 1 in
+ def tADDSrr : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ 2, IIC_iALUr,
+ [(set tGPR:$Rd, CPSR, (ARMaddc tGPR:$Rn,
+ tGPR:$Rm))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+ }
+
let hasSideEffects = 0 in
def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
"add", "\t$Rdn, $Rm", []>,
@@ -951,6 +997,12 @@ let isAdd = 1 in {
}
}
+def : tInstSubst<"sub${s}${p} $rd, $rn, $imm",
+ (tADDi3 tGPR:$rd, s_cc_out:$s, tGPR:$rn, mod_imm1_7_neg:$imm, pred:$p)>;
+def : tInstSubst<"sub${s}${p} $rdn, $imm",
+ (tADDi8 tGPR:$rdn, s_cc_out:$s, mod_imm8_255_neg:$imm, pred:$p)>;
+
+
// AND register
let isCommutable = 1 in
def tAND : // A8.6.12
@@ -1197,7 +1249,7 @@ def tSBC : // A8.6.151
T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iALUr,
"sbc", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>,
+ []>,
Sched<[WriteALU]>;
// Subtract immediate
@@ -1218,6 +1270,14 @@ def tSUBi8 : // A8.6.210 T2
[(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>,
Sched<[WriteALU]>;
+def : tInstSubst<"add${s}${p} $rd, $rn, $imm",
+ (tSUBi3 tGPR:$rd, s_cc_out:$s, tGPR:$rn, mod_imm1_7_neg:$imm, pred:$p)>;
+
+
+def : tInstSubst<"add${s}${p} $rdn, $imm",
+ (tSUBi8 tGPR:$rdn, s_cc_out:$s, mod_imm8_255_neg:$imm, pred:$p)>;
+
+
// Subtract register
def tSUBrr : // A8.6.212
T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
@@ -1226,6 +1286,42 @@ def tSUBrr : // A8.6.212
[(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>,
Sched<[WriteALU]>;
+/// Similar to the above except these set the 's' bit so the
+/// instruction modifies the CPSR register.
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+ let Uses = [CPSR] in
+ def tSBCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ 2, IIC_iALUr,
+ [(set tGPR:$Rdn, CPSR, (ARMsube tGPR:$Rn, tGPR:$Rm,
+ CPSR))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ def tSUBSi3 : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
+ 2, IIC_iALUi,
+ [(set tGPR:$Rd, CPSR, (ARMsubc tGPR:$Rm,
+ imm0_7:$imm3))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ def tSUBSi8 : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8),
+ 2, IIC_iALUi,
+ [(set tGPR:$Rdn, CPSR, (ARMsubc tGPR:$Rn,
+ imm8_255:$imm8))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ def tSUBSrr : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ 2, IIC_iALUr,
+ [(set tGPR:$Rd, CPSR, (ARMsubc tGPR:$Rn,
+ tGPR:$Rm))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+}
+
// Sign-extend byte
def tSXTB : // A8.6.222
T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
@@ -1317,14 +1413,15 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
// Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them
// and make use of the same compressed jump table format as Thumb-2.
-let Size = 2 in {
+let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1,
+ isIndirectBranch = 1 in {
def tTBB_JT : tPseudoInst<(outs),
- (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
- Sched<[WriteBr]>;
+ (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0,
+ IIC_Br, []>, Sched<[WriteBr]>;
def tTBH_JT : tPseudoInst<(outs),
- (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
- Sched<[WriteBr]>;
+ (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0,
+ IIC_Br, []>, Sched<[WriteBr]>;
}
//===----------------------------------------------------------------------===//
@@ -1386,22 +1483,6 @@ def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8),
def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm),
(tCMPr tGPR:$Rn, tGPR:$Rm)>;
-// Add with carry
-def : T1Pat<(addc tGPR:$lhs, imm0_7:$rhs),
- (tADDi3 tGPR:$lhs, imm0_7:$rhs)>;
-def : T1Pat<(addc tGPR:$lhs, imm8_255:$rhs),
- (tADDi8 tGPR:$lhs, imm8_255:$rhs)>;
-def : T1Pat<(addc tGPR:$lhs, tGPR:$rhs),
- (tADDrr tGPR:$lhs, tGPR:$rhs)>;
-
-// Subtract with carry
-def : T1Pat<(addc tGPR:$lhs, imm0_7_neg:$rhs),
- (tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>;
-def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs),
- (tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>;
-def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs),
- (tSUBrr tGPR:$lhs, tGPR:$rhs)>;
-
// Bswap 16 with load/store
def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)),
(tREV16 (tLDRHi t_addrmode_is2:$addr))>;
@@ -1477,7 +1558,7 @@ def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>;
// post-inc LDR -> LDM r0!, {r1}. The way operands are layed out in LDMs is
// different to how ISel expects them for a post-inc load, so use a pseudo
// and expand it just after ISel.
-let usesCustomInserter = 1,
+let usesCustomInserter = 1, mayLoad =1,
Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in
def tLDR_postidx: tPseudoInst<(outs rGPR:$Rt, rGPR:$Rn_wb),
(ins rGPR:$Rn, pred:$p),
@@ -1547,7 +1628,7 @@ def : T1Pat<(i32 thumb_immshifted:$src),
(thumb_immshifted_shamt imm:$src))>;
def : T1Pat<(i32 imm0_255_comp:$src),
- (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+ (tMVN (tMOVi8 (imm_not_XFORM imm:$src)))>;
def : T1Pat<(i32 imm256_510:$src),
(tADDi8 (tMOVi8 255),
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 603d664..42eac12 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -76,7 +76,11 @@ def t2_so_imm_notSext16_XFORM : SDNodeXForm<imm, [{
// t2_so_imm - Match a 32-bit immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
// immediate splatted into multiple bytes of the word.
-def t2_so_imm_asmoperand : ImmAsmOperand { let Name = "T2SOImm"; }
+def t2_so_imm_asmoperand : AsmOperandClass {
+ let Name = "T2SOImm";
+ let RenderMethod = "addImmOperands";
+
+}
def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
return ARM_AM::getT2SOImmVal(Imm) != -1;
}]> {
@@ -110,15 +114,14 @@ def t2_so_imm_notSext : Operand<i32>, PatLeaf<(imm), [{
// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; }
-def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
- int64_t Value = -(int)N->getZExtValue();
- return Value && ARM_AM::getT2SOImmVal(Value) != -1;
+def t2_so_imm_neg : Operand<i32>, ImmLeaf<i32, [{
+ return Imm && ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
}], t2_so_imm_neg_XFORM> {
let ParserMatchClass = t2_so_imm_neg_asmoperand;
}
-/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
-def imm0_4095_asmoperand: ImmAsmOperand { let Name = "Imm0_4095"; }
+/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0,4095].
+def imm0_4095_asmoperand: ImmAsmOperand<0,4095> { let Name = "Imm0_4095"; }
def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 4096;
}]> {
@@ -139,7 +142,7 @@ def imm1_255_neg : PatLeaf<(i32 imm), [{
def imm0_255_not : PatLeaf<(i32 imm), [{
return (uint32_t)(~N->getZExtValue()) < 255;
-}], imm_comp_XFORM>;
+}], imm_not_XFORM>;
def lo5AllOne : PatLeaf<(i32 imm), [{
// Returns true if all low 5-bits are 1.
@@ -538,7 +541,8 @@ class T2FourReg<dag oops, dag iops, InstrItinClass itin,
class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
string opc, list<dag> pattern>
: T2I<(outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
- opc, "\t$RdLo, $RdHi, $Rn, $Rm", pattern> {
+ opc, "\t$RdLo, $RdHi, $Rn, $Rm", pattern>,
+ Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]> {
bits<4> RdLo;
bits<4> RdHi;
bits<4> Rn;
@@ -556,7 +560,8 @@ class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4, string opc>
: T2I<(outs rGPR:$RdLo, rGPR:$RdHi),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
opc, "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- RegConstraint<"$RLo = $RdLo, $RHi = $RdHi"> {
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]> {
bits<4> RdLo;
bits<4> RdHi;
bits<4> Rn;
@@ -977,7 +982,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
PatFrag opnode> {
def i12 : T2Ii12<(outs target:$Rt), (ins t2addrmode_imm12:$addr), iii,
opc, ".w\t$Rt, $addr",
- [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]> {
+ [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]>,
+ Sched<[WriteLd]> {
bits<4> Rt;
bits<17> addr;
let Inst{31-25} = 0b1111100;
@@ -993,7 +999,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
}
def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii,
opc, "\t$Rt, $addr",
- [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]> {
+ [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]>,
+ Sched<[WriteLd]> {
bits<4> Rt;
bits<13> addr;
let Inst{31-27} = 0b11111;
@@ -1015,7 +1022,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
}
def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis,
opc, ".w\t$Rt, $addr",
- [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
+ [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]>,
+ Sched<[WriteLd]> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
@@ -1039,7 +1047,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
// from the PC.
def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii,
opc, ".w\t$Rt, $addr",
- [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
+ [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]>,
+ Sched<[WriteLd]> {
let isReMaterializable = 1;
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
@@ -1065,7 +1074,8 @@ multiclass T2I_st<bits<2> opcod, string opc,
PatFrag opnode> {
def i12 : T2Ii12<(outs), (ins target:$Rt, t2addrmode_imm12:$addr), iii,
opc, ".w\t$Rt, $addr",
- [(opnode target:$Rt, t2addrmode_imm12:$addr)]> {
+ [(opnode target:$Rt, t2addrmode_imm12:$addr)]>,
+ Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0001;
let Inst{22-21} = opcod;
@@ -1082,7 +1092,8 @@ multiclass T2I_st<bits<2> opcod, string opc,
}
def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii,
opc, "\t$Rt, $addr",
- [(opnode target:$Rt, t2addrmode_negimm8:$addr)]> {
+ [(opnode target:$Rt, t2addrmode_negimm8:$addr)]>,
+ Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0000;
let Inst{22-21} = opcod;
@@ -1102,7 +1113,8 @@ multiclass T2I_st<bits<2> opcod, string opc,
}
def s : T2Iso <(outs), (ins target:$Rt, t2addrmode_so_reg:$addr), iis,
opc, ".w\t$Rt, $addr",
- [(opnode target:$Rt, t2addrmode_so_reg:$addr)]> {
+ [(opnode target:$Rt, t2addrmode_so_reg:$addr)]>,
+ Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0000;
let Inst{22-21} = opcod;
@@ -1121,28 +1133,10 @@ multiclass T2I_st<bits<2> opcod, string opc,
/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-class T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode>
- : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
- opc, ".w\t$Rd, $Rm$rot",
- [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
- Requires<[IsThumb2]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
-
- bits<2> rot;
- let Inst{5-4} = rot{1-0}; // rotate
-}
-
-// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
-class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode>
- : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot),
- IIC_iEXTr, opc, "\t$Rd, $Rm$rot",
- [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
+class T2I_ext_rrot_base<bits<3> opcod, dag iops, dag oops,
+ string opc, string oprs,
+ list<dag> pattern>
+ : T2TwoReg<iops, oops, IIC_iEXTr, opc, oprs, pattern> {
bits<2> rot;
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -1150,46 +1144,34 @@ class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode>
let Inst{19-16} = 0b1111; // Rn
let Inst{15-12} = 0b1111;
let Inst{7} = 1;
- let Inst{5-4} = rot;
-}
-
-// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
-// supported yet.
-class T2I_ext_rrot_sxtb16<bits<3> opcod, string opc>
- : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
- opc, "\t$Rd, $Rm$rot", []>,
- Requires<[IsThumb2, HasT2ExtractPack]> {
- bits<2> rot;
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = rot;
-}
+ let Inst{5-4} = rot; // rotate
+}
+
+class T2I_ext_rrot<bits<3> opcod, string opc>
+ : T2I_ext_rrot_base<opcod,
+ (outs rGPR:$Rd),
+ (ins rGPR:$Rm, rot_imm:$rot),
+ opc, ".w\t$Rd, $Rm$rot", []>,
+ Requires<[IsThumb2]>,
+ Sched<[WriteALU, ReadALU]>;
+
+// UXTB16, SXTB16 - Requires HasDSP, does not need the .w qualifier.
+class T2I_ext_rrot_xtb16<bits<3> opcod, string opc>
+ : T2I_ext_rrot_base<opcod,
+ (outs rGPR:$Rd),
+ (ins rGPR:$Rm, rot_imm:$rot),
+ opc, "\t$Rd, $Rm$rot", []>,
+ Requires<[HasDSP, IsThumb2]>,
+ Sched<[WriteALU, ReadALU]>;
/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-class T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode>
+class T2I_exta_rrot<bits<3> opcod, string opc>
: T2ThreeReg<(outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
- IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, (rotr rGPR:$Rm,rot_imm:$rot)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
- bits<2> rot;
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = rot;
-}
-
-class T2I_exta_rrot_np<bits<3> opcod, string opc>
- : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm,rot_imm:$rot),
IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
+ Requires<[HasDSP, IsThumb2]>,
+ Sched<[WriteALU, ReadALU]> {
bits<2> rot;
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -1279,7 +1261,8 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
(ins t2addrmode_imm8s4:$addr),
- IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>;
+ IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>,
+ Sched<[WriteLd]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
// zextload i1 -> zextload i8
@@ -1333,17 +1316,20 @@ let mayLoad = 1, hasSideEffects = 0 in {
def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
- "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>;
+ "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
- "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+ "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>;
+ "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
@@ -1353,41 +1339,45 @@ def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>;
+ "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+ "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
"ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
- []>;
+ []>, Sched<[WriteLd]>;
def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+ "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
"ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
- []>;
+ []>, Sched<[WriteLd]>;
def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+ "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>,
+ Sched<[WriteLd]>;
} // mayLoad = 1, hasSideEffects = 0
// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110).
// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
: T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc,
- "\t$Rt, $addr", []> {
+ "\t$Rt, $addr", []>, Sched<[WriteLd]> {
bits<4> Rt;
bits<13> addr;
let Inst{31-27} = 0b11111;
@@ -1431,11 +1421,14 @@ class T2Ildacq<bits<4> bits23_20, bits<2> bit54, dag oops, dag iops,
}
def t2LDA : T2Ildacq<0b1101, 0b10, (outs rGPR:$Rt),
- (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>;
+ (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>,
+ Sched<[WriteLd]>;
def t2LDAB : T2Ildacq<0b1101, 0b00, (outs rGPR:$Rt),
- (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>;
+ (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>,
+ Sched<[WriteLd]>;
def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt),
- (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>;
+ (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>,
+ Sched<[WriteLd]>;
// Store
defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR, store>;
@@ -1448,7 +1441,8 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
- IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>;
+ IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>,
+ Sched<[WriteST]>;
// Indexed stores
@@ -1457,19 +1451,22 @@ def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
(ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
"str", "\t$Rt, $addr!",
- "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>,
+ Sched<[WriteST]>;
def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
"strh", "\t$Rt, $addr!",
- "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>,
+ Sched<[WriteST]>;
def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
"strb", "\t$Rt, $addr!",
- "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>,
+ Sched<[WriteST]>;
} // mayStore = 1, hasSideEffects = 0
def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
@@ -1480,7 +1477,8 @@ def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
"$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPRnopc:$Rn_wb,
(post_store GPRnopc:$Rt, addr_offset_none:$Rn,
- t2am_imm8_offset:$offset))]>;
+ t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, addr_offset_none:$Rn,
@@ -1490,7 +1488,8 @@ def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
"$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPRnopc:$Rn_wb,
(post_truncsti16 rGPR:$Rt, addr_offset_none:$Rn,
- t2am_imm8_offset:$offset))]>;
+ t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, addr_offset_none:$Rn,
@@ -1500,7 +1499,8 @@ def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb),
"$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPRnopc:$Rn_wb,
(post_truncsti8 rGPR:$Rt, addr_offset_none:$Rn,
- t2am_imm8_offset:$offset))]>;
+ t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
// Pseudo-instructions for pattern matching the pre-indexed stores. We can't
// put the patterns on the instruction definitions directly as ISel wants
@@ -1513,17 +1513,20 @@ def t2STR_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
4, IIC_iStore_ru,
[(set GPRnopc:$Rn_wb,
- (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+ (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
def t2STRB_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
4, IIC_iStore_ru,
[(set GPRnopc:$Rn_wb,
- (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+ (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
4, IIC_iStore_ru,
[(set GPRnopc:$Rn_wb,
- (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+ (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
}
// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
@@ -1531,7 +1534,7 @@ def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
class T2IstT<bits<2> type, string opc, InstrItinClass ii>
: T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
- "\t$Rt, $addr", []> {
+ "\t$Rt, $addr", []>, Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = 0; // not signed
@@ -1557,7 +1560,8 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
let mayLoad = 1 in
def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
(ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru,
- "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> {
+ "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []>,
+ Sched<[WriteLd]> {
let DecoderMethod = "DecodeT2LDRDPreInstruction";
}
@@ -1565,13 +1569,13 @@ let mayLoad = 1 in
def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
(ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm),
IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm",
- "$addr.base = $wb", []>;
+ "$addr.base = $wb", []>, Sched<[WriteLd]>;
let mayStore = 1 in
def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!",
- "$addr.base = $wb", []> {
+ "$addr.base = $wb", []>, Sched<[WriteST]> {
let DecoderMethod = "DecodeT2STRDPreInstruction";
}
@@ -1580,12 +1584,13 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
(ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr,
t2am_imm8s4_offset:$imm),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm",
- "$addr.base = $wb", []>;
+ "$addr.base = $wb", []>, Sched<[WriteST]>;
class T2Istrrel<bits<2> bit54, dag oops, dag iops,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary, opc,
- asm, "", pattern>, Requires<[IsThumb, HasAcquireRelease]> {
+ asm, "", pattern>, Requires<[IsThumb, HasAcquireRelease]>,
+ Sched<[WriteST]> {
bits<4> Rt;
bits<4> addr;
@@ -1861,7 +1866,7 @@ defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
//
let hasSideEffects = 0 in
-def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rm), IIC_iMOVr,
"mov", ".w\t$Rd, $Rm", []>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -1870,11 +1875,11 @@ def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
let Inst{14-12} = 0b000;
let Inst{7-4} = 0b0000;
}
-def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm,
pred:$p, zero_reg)>;
-def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm,
pred:$p, CPSR)>;
-def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm,
pred:$p, CPSR)>;
// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
@@ -1926,10 +1931,11 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
def : InstAlias<"mov${p} $Rd, $imm",
(t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p), 0>,
- Requires<[IsThumb, HasV8MBaseline]>;
+ Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteALU]>;
def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
- (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
+ Sched<[WriteALU]>;
let Constraints = "$src = $Rd" in {
def t2MOVTi16 : T2I<(outs rGPR:$Rd),
@@ -1969,31 +1975,43 @@ def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
// Sign extenders
-def t2SXTB : T2I_ext_rrot<0b100, "sxtb",
- UnOpFrag<(sext_inreg node:$Src, i8)>>;
-def t2SXTH : T2I_ext_rrot<0b000, "sxth",
- UnOpFrag<(sext_inreg node:$Src, i16)>>;
-def t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
+def t2SXTB : T2I_ext_rrot<0b100, "sxtb">;
+def t2SXTH : T2I_ext_rrot<0b000, "sxth">;
+def t2SXTB16 : T2I_ext_rrot_xtb16<0b010, "sxtb16">;
+
+def t2SXTAB : T2I_exta_rrot<0b100, "sxtab">;
+def t2SXTAH : T2I_exta_rrot<0b000, "sxtah">;
+def t2SXTAB16 : T2I_exta_rrot<0b010, "sxtab16">;
+
+def : T2Pat<(sext_inreg (rotr rGPR:$Rn, rot_imm:$rot), i8),
+ (t2SXTB rGPR:$Rn, rot_imm:$rot)>;
+def : T2Pat<(sext_inreg (rotr rGPR:$Rn, rot_imm:$rot), i16),
+ (t2SXTH rGPR:$Rn, rot_imm:$rot)>;
+def : Thumb2DSPPat<(add rGPR:$Rn,
+ (sext_inreg (rotr rGPR:$Rm, rot_imm:$rot), i8)),
+ (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(add rGPR:$Rn,
+ (sext_inreg (rotr rGPR:$Rm, rot_imm:$rot), i16)),
+ (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(int_arm_sxtb16 rGPR:$Rn),
+ (t2SXTB16 rGPR:$Rn, 0)>;
+def : Thumb2DSPPat<(int_arm_sxtab16 rGPR:$Rn, rGPR:$Rm),
+ (t2SXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>;
-def t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
- BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
-def t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
- BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
-def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">;
// A simple right-shift can also be used in most cases (the exception is the
// SXTH operations with a rotate of 24: there the non-contiguous bits are
// relevant).
-def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg
(srl rGPR:$Rm, rot_imm:$rot), i8)),
(t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
-def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg
(srl rGPR:$Rm, imm8_or_16:$rot), i16)),
(t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
-def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg
(rotr rGPR:$Rm, (i32 24)), i16)),
(t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>;
-def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg
(or (srl rGPR:$Rm, (i32 24)),
(shl rGPR:$Rm, (i32 8))), i16)),
(t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>;
@@ -2001,12 +2019,19 @@ def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
// Zero extenders
let AddedComplexity = 16 in {
-def t2UXTB : T2I_ext_rrot<0b101, "uxtb",
- UnOpFrag<(and node:$Src, 0x000000FF)>>;
-def t2UXTH : T2I_ext_rrot<0b001, "uxth",
- UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
- UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+def t2UXTB : T2I_ext_rrot<0b101, "uxtb">;
+def t2UXTH : T2I_ext_rrot<0b001, "uxth">;
+def t2UXTB16 : T2I_ext_rrot_xtb16<0b011, "uxtb16">;
+
+def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x000000FF),
+ (t2UXTB rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x0000FFFF),
+ (t2UXTH rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x00FF00FF),
+ (t2UXTB16 rGPR:$Rm, rot_imm:$rot)>;
+
+def : Thumb2DSPPat<(int_arm_uxtb16 rGPR:$Rm),
+ (t2UXTB16 rGPR:$Rm, 0)>;
// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
// The transformation should probably be done as a combiner action
@@ -2014,23 +2039,29 @@ def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
// eight bits of the source into the lower eight bits of the result.
//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
// (t2UXTB16 rGPR:$Src, 3)>,
-// Requires<[HasT2ExtractPack, IsThumb2]>;
+// Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
(t2UXTB16 rGPR:$Src, 1)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
-def t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
- BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
-def t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
- BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
-def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">;
+def t2UXTAB : T2I_exta_rrot<0b101, "uxtab">;
+def t2UXTAH : T2I_exta_rrot<0b001, "uxtah">;
+def t2UXTAB16 : T2I_exta_rrot<0b011, "uxtab16">;
-def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot),
+def : Thumb2DSPPat<(add rGPR:$Rn, (and (rotr rGPR:$Rm, rot_imm:$rot),
+ 0x00FF)),
+ (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(add rGPR:$Rn, (and (rotr rGPR:$Rm, rot_imm:$rot),
+ 0xFFFF)),
+ (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot),
0xFF)),
(t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
-def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot),
+def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot),
0xFFFF)),
(t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(int_arm_uxtab16 rGPR:$Rn, rGPR:$Rm),
+ (t2UXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>;
}
@@ -2060,6 +2091,19 @@ defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", ARMadde, 1>;
defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", ARMsube>;
}
+def : t2InstSubst<"adc${s}${p} $rd, $rn, $imm",
+ (t2SBCri rGPR:$rd, rGPR:$rn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>;
+def : t2InstSubst<"sbc${s}${p} $rd, $rn, $imm",
+ (t2ADCri rGPR:$rd, rGPR:$rn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>;
+
+def : t2InstSubst<"add${s}${p}.w $rd, $rn, $imm",
+ (t2SUBri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
+def : t2InstSubst<"addw${p} $rd, $rn, $imm",
+ (t2SUBri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>;
+def : t2InstSubst<"sub${s}${p}.w $rd, $rn, $imm",
+ (t2ADDri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
+def : t2InstSubst<"subw${p} $rd, $rn, $imm",
+ (t2ADDri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>;
// RSB
defm t2RSB : T2I_rbin_irs <0b1110, "rsb", sub>;
@@ -2102,10 +2146,9 @@ def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR),
def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR),
(t2SBCrr rGPR:$src, (t2MOVi16 (imm_not_XFORM imm:$imm)))>;
-// Select Bytes -- for disassembly only
-
def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>,
+ NoItinerary, "sel", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (int_arm_sel GPR:$Rn, GPR:$Rm))]>,
Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-24} = 0b010;
@@ -2119,9 +2162,7 @@ def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
// And Miscellaneous operations -- for disassembly only
class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
- list<dag> pat = [/* For disassembly only; pattern left blank */],
- dag iops = (ins rGPR:$Rn, rGPR:$Rm),
- string asm = "\t$Rd, $Rn, $Rm">
+ list<dag> pat, dag iops, string asm>
: T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>,
Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
@@ -2139,60 +2180,72 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
let Inst{3-0} = Rm;
}
-// Saturating add/subtract -- for disassembly only
-
-def t2QADD : T2I_pam<0b000, 0b1000, "qadd",
- [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))],
- (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
-def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">;
-def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">;
-def t2QASX : T2I_pam<0b010, 0b0001, "qasx">;
-def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd", [],
- (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
-def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub", [],
- (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
-def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">;
-def t2QSUB : T2I_pam<0b000, 0b1010, "qsub",
- [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))],
- (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
-def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">;
-def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">;
-def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
-def t2UQADD8 : T2I_pam<0b000, 0b0101, "uqadd8">;
-def t2UQASX : T2I_pam<0b010, 0b0101, "uqasx">;
-def t2UQSAX : T2I_pam<0b110, 0b0101, "uqsax">;
-def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">;
-def t2UQSUB8 : T2I_pam<0b100, 0b0101, "uqsub8">;
-
-// Signed/Unsigned add/subtract -- for disassembly only
-
-def t2SASX : T2I_pam<0b010, 0b0000, "sasx">;
-def t2SADD16 : T2I_pam<0b001, 0b0000, "sadd16">;
-def t2SADD8 : T2I_pam<0b000, 0b0000, "sadd8">;
-def t2SSAX : T2I_pam<0b110, 0b0000, "ssax">;
-def t2SSUB16 : T2I_pam<0b101, 0b0000, "ssub16">;
-def t2SSUB8 : T2I_pam<0b100, 0b0000, "ssub8">;
-def t2UASX : T2I_pam<0b010, 0b0100, "uasx">;
-def t2UADD16 : T2I_pam<0b001, 0b0100, "uadd16">;
-def t2UADD8 : T2I_pam<0b000, 0b0100, "uadd8">;
-def t2USAX : T2I_pam<0b110, 0b0100, "usax">;
-def t2USUB16 : T2I_pam<0b101, 0b0100, "usub16">;
-def t2USUB8 : T2I_pam<0b100, 0b0100, "usub8">;
-
-// Signed/Unsigned halving add/subtract -- for disassembly only
-
-def t2SHASX : T2I_pam<0b010, 0b0010, "shasx">;
-def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">;
-def t2SHADD8 : T2I_pam<0b000, 0b0010, "shadd8">;
-def t2SHSAX : T2I_pam<0b110, 0b0010, "shsax">;
-def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">;
-def t2SHSUB8 : T2I_pam<0b100, 0b0010, "shsub8">;
-def t2UHASX : T2I_pam<0b010, 0b0110, "uhasx">;
-def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">;
-def t2UHADD8 : T2I_pam<0b000, 0b0110, "uhadd8">;
-def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">;
-def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">;
-def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">;
+class T2I_pam_intrinsics<bits<3> op22_20, bits<4> op7_4, string opc,
+ Intrinsic intrinsic>
+ : T2I_pam<op22_20, op7_4, opc,
+ [(set rGPR:$Rd, (intrinsic rGPR:$Rn, rGPR:$Rm))],
+ (ins rGPR:$Rn, rGPR:$Rm), "\t$Rd, $Rn, $Rm">;
+
+class T2I_pam_intrinsics_rev<bits<3> op22_20, bits<4> op7_4, string opc>
+ : T2I_pam<op22_20, op7_4, opc, [],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+
+// Saturating add/subtract
+def t2QADD16 : T2I_pam_intrinsics<0b001, 0b0001, "qadd16", int_arm_qadd16>;
+def t2QADD8 : T2I_pam_intrinsics<0b000, 0b0001, "qadd8", int_arm_qadd8>;
+def t2QASX : T2I_pam_intrinsics<0b010, 0b0001, "qasx", int_arm_qasx>;
+def t2UQSUB8 : T2I_pam_intrinsics<0b100, 0b0101, "uqsub8", int_arm_uqsub8>;
+def t2QSAX : T2I_pam_intrinsics<0b110, 0b0001, "qsax", int_arm_qsax>;
+def t2QSUB16 : T2I_pam_intrinsics<0b101, 0b0001, "qsub16", int_arm_qsub16>;
+def t2QSUB8 : T2I_pam_intrinsics<0b100, 0b0001, "qsub8", int_arm_qsub8>;
+def t2UQADD16 : T2I_pam_intrinsics<0b001, 0b0101, "uqadd16", int_arm_uqadd16>;
+def t2UQADD8 : T2I_pam_intrinsics<0b000, 0b0101, "uqadd8", int_arm_uqadd8>;
+def t2UQASX : T2I_pam_intrinsics<0b010, 0b0101, "uqasx", int_arm_uqasx>;
+def t2UQSAX : T2I_pam_intrinsics<0b110, 0b0101, "uqsax", int_arm_uqsax>;
+def t2UQSUB16 : T2I_pam_intrinsics<0b101, 0b0101, "uqsub16", int_arm_uqsub16>;
+def t2QADD : T2I_pam_intrinsics_rev<0b000, 0b1000, "qadd">;
+def t2QSUB : T2I_pam_intrinsics_rev<0b000, 0b1010, "qsub">;
+def t2QDADD : T2I_pam_intrinsics_rev<0b000, 0b1001, "qdadd">;
+def t2QDSUB : T2I_pam_intrinsics_rev<0b000, 0b1011, "qdsub">;
+
+def : Thumb2DSPPat<(int_arm_qadd rGPR:$Rm, rGPR:$Rn),
+ (t2QADD rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, rGPR:$Rn),
+ (t2QSUB rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(int_arm_qadd(int_arm_qadd rGPR:$Rm, rGPR:$Rm), rGPR:$Rn),
+ (t2QDADD rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)),
+ (t2QDSUB rGPR:$Rm, rGPR:$Rn)>;
+
+// Signed/Unsigned add/subtract
+
+def t2SASX : T2I_pam_intrinsics<0b010, 0b0000, "sasx", int_arm_sasx>;
+def t2SADD16 : T2I_pam_intrinsics<0b001, 0b0000, "sadd16", int_arm_sadd16>;
+def t2SADD8 : T2I_pam_intrinsics<0b000, 0b0000, "sadd8", int_arm_sadd8>;
+def t2SSAX : T2I_pam_intrinsics<0b110, 0b0000, "ssax", int_arm_ssax>;
+def t2SSUB16 : T2I_pam_intrinsics<0b101, 0b0000, "ssub16", int_arm_ssub16>;
+def t2SSUB8 : T2I_pam_intrinsics<0b100, 0b0000, "ssub8", int_arm_ssub8>;
+def t2UASX : T2I_pam_intrinsics<0b010, 0b0100, "uasx", int_arm_uasx>;
+def t2UADD16 : T2I_pam_intrinsics<0b001, 0b0100, "uadd16", int_arm_uadd16>;
+def t2UADD8 : T2I_pam_intrinsics<0b000, 0b0100, "uadd8", int_arm_uadd8>;
+def t2USAX : T2I_pam_intrinsics<0b110, 0b0100, "usax", int_arm_usax>;
+def t2USUB16 : T2I_pam_intrinsics<0b101, 0b0100, "usub16", int_arm_usub16>;
+def t2USUB8 : T2I_pam_intrinsics<0b100, 0b0100, "usub8", int_arm_usub8>;
+
+// Signed/Unsigned halving add/subtract
+
+def t2SHASX : T2I_pam_intrinsics<0b010, 0b0010, "shasx", int_arm_shasx>;
+def t2SHADD16 : T2I_pam_intrinsics<0b001, 0b0010, "shadd16", int_arm_shadd16>;
+def t2SHADD8 : T2I_pam_intrinsics<0b000, 0b0010, "shadd8", int_arm_shadd8>;
+def t2SHSAX : T2I_pam_intrinsics<0b110, 0b0010, "shsax", int_arm_shsax>;
+def t2SHSUB16 : T2I_pam_intrinsics<0b101, 0b0010, "shsub16", int_arm_shsub16>;
+def t2SHSUB8 : T2I_pam_intrinsics<0b100, 0b0010, "shsub8", int_arm_shsub8>;
+def t2UHASX : T2I_pam_intrinsics<0b010, 0b0110, "uhasx", int_arm_uhasx>;
+def t2UHADD16 : T2I_pam_intrinsics<0b001, 0b0110, "uhadd16", int_arm_uhadd16>;
+def t2UHADD8 : T2I_pam_intrinsics<0b000, 0b0110, "uhadd8", int_arm_uhadd8>;
+def t2UHSAX : T2I_pam_intrinsics<0b110, 0b0110, "uhsax", int_arm_uhsax>;
+def t2UHSUB16 : T2I_pam_intrinsics<0b101, 0b0110, "uhsub16", int_arm_uhsub16>;
+def t2UHSUB8 : T2I_pam_intrinsics<0b100, 0b0110, "uhsub8", int_arm_uhsub8>;
// Helper class for disassembly only
// A6.3.16 & A6.3.17
@@ -2220,96 +2273,94 @@ class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
// Unsigned Sum of Absolute Differences [and Accumulate].
def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm),
- NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>,
+ NoItinerary, "usad8", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (int_arm_usad8 rGPR:$Rn, rGPR:$Rm))]>,
Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
- "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ "usada8", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (int_arm_usada8 rGPR:$Rn, rGPR:$Rm, rGPR:$Ra))]>,
Requires<[IsThumb2, HasDSP]>;
// Signed/Unsigned saturate.
-class T2SatI<dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, itin, opc, asm, pattern> {
+let hasSideEffects = 1 in
+class T2SatI<dag iops, string opc, string asm>
+ : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, []> {
bits<4> Rd;
bits<4> Rn;
bits<5> sat_imm;
- bits<7> sh;
+ bits<6> sh;
- let Inst{11-8} = Rd;
+ let Inst{31-24} = 0b11110011;
+ let Inst{21} = sh{5};
+ let Inst{20} = 0;
let Inst{19-16} = Rn;
- let Inst{4-0} = sat_imm;
- let Inst{21} = sh{5};
+ let Inst{15} = 0;
let Inst{14-12} = sh{4-2};
- let Inst{7-6} = sh{1-0};
+ let Inst{11-8} = Rd;
+ let Inst{7-6} = sh{1-0};
+ let Inst{5} = 0;
+ let Inst{4-0} = sat_imm;
}
-def t2SSAT: T2SatI<
- (outs rGPR:$Rd),
- (ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
- NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>,
- Requires<[IsThumb2]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1100;
- let Inst{20} = 0;
- let Inst{15} = 0;
+def t2SSAT: T2SatI<(ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ "ssat", "\t$Rd, $sat_imm, $Rn$sh">,
+ Requires<[IsThumb2]> {
+ let Inst{23-22} = 0b00;
let Inst{5} = 0;
}
-def t2SSAT16: T2SatI<
- (outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary,
- "ssat16", "\t$Rd, $sat_imm, $Rn", []>,
- Requires<[IsThumb2, HasDSP]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1100;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
- let Inst{14-12} = 0b000; // imm3 = '000'
- let Inst{7-6} = 0b00; // imm2 = '00'
- let Inst{5-4} = 0b00;
+def t2SSAT16: T2SatI<(ins imm1_16:$sat_imm, rGPR:$Rn),
+ "ssat16", "\t$Rd, $sat_imm, $Rn">,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{23-22} = 0b00;
+ let sh = 0b100000;
+ let Inst{4} = 0;
}
-def t2USAT: T2SatI<
- (outs rGPR:$Rd),
- (ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
- NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []>,
- Requires<[IsThumb2]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1110;
- let Inst{20} = 0;
- let Inst{15} = 0;
+def t2USAT: T2SatI<(ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ "usat", "\t$Rd, $sat_imm, $Rn$sh">,
+ Requires<[IsThumb2]> {
+ let Inst{23-22} = 0b10;
}
-def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
- NoItinerary,
- "usat16", "\t$Rd, $sat_imm, $Rn", []>,
+def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn),
+ "usat16", "\t$Rd, $sat_imm, $Rn">,
Requires<[IsThumb2, HasDSP]> {
- let Inst{31-22} = 0b1111001110;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
- let Inst{14-12} = 0b000; // imm3 = '000'
- let Inst{7-6} = 0b00; // imm2 = '00'
- let Inst{5-4} = 0b00;
+ let Inst{23-22} = 0b10;
+ let sh = 0b100000;
+ let Inst{4} = 0;
}
-def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>;
-def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), (t2USAT imm0_31:$pos, GPR:$a, 0)>;
def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm),
(t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
+def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos),
+ (t2SSAT imm1_32:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos),
+ (t2USAT imm0_31:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_ssat16 GPR:$a, imm1_16:$pos),
+ (t2SSAT16 imm1_16:$pos, GPR:$a)>;
+def : T2Pat<(int_arm_usat16 GPR:$a, imm0_15:$pos),
+ (t2USAT16 imm0_15:$pos, GPR:$a)>;
//===----------------------------------------------------------------------===//
// Shift and rotate Instructions.
//
-defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31, shl>;
+defm t2LSL : T2I_sh_ir<0b00, "lsl", imm1_31, shl>;
defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr, srl>;
defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr, sra>;
defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, rotr>;
+// LSL #0 is actually MOV, and has slightly different permitted registers to
+// LSL with non-zero shift
+def : t2InstAlias<"lsl${s}${p} $Rd, $Rm, #0",
+ (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"lsl${s}${p}.w $Rd, $Rm, #0",
+ (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>;
+
// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
(t2RORrr rGPR:$lhs, rGPR:$rhs)>;
@@ -2547,7 +2598,8 @@ def : T2Pat<(t2_so_imm_not:$src),
let isCommutable = 1 in
def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
"mul", "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> {
+ [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -2558,7 +2610,8 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
class T2FourRegMLA<bits<4> op7_4, string opc, list<dag> pattern>
: T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
- Requires<[IsThumb2, UseMulOps]> {
+ Requires<[IsThumb2, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -2575,8 +2628,12 @@ def t2MLS: T2FourRegMLA<0b0001, "mls",
// Extra precision multiplies with low / high results
let hasSideEffects = 0 in {
let isCommutable = 1 in {
-def t2SMULL : T2MulLong<0b000, 0b0000, "smull", []>;
-def t2UMULL : T2MulLong<0b010, 0b0000, "umull", []>;
+def t2SMULL : T2MulLong<0b000, 0b0000, "smull",
+ [(set rGPR:$RdLo, rGPR:$RdHi,
+ (smullohi rGPR:$Rn, rGPR:$Rm))]>;
+def t2UMULL : T2MulLong<0b010, 0b0000, "umull",
+ [(set rGPR:$RdLo, rGPR:$RdHi,
+ (umullohi rGPR:$Rn, rGPR:$Rm))]>;
} // isCommutable
// Multiply + accumulate
@@ -2592,7 +2649,8 @@ class T2SMMUL<bits<4> op7_4, string opc, list<dag> pattern>
: T2ThreeReg<(outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
opc, "\t$Rd, $Rn, $Rm", pattern>,
- Requires<[IsThumb2, HasDSP]> {
+ Requires<[IsThumb2, HasDSP]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2607,7 +2665,8 @@ class T2FourRegSMMLA<bits<3> op22_20, bits<4> op7_4, string opc,
list<dag> pattern>
: T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
- Requires<[IsThumb2, HasDSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = op22_20;
@@ -2624,7 +2683,8 @@ class T2ThreeRegSMUL<bits<3> op22_20, bits<2> op5_4, string opc,
list<dag> pattern>
: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, opc,
"\t$Rd, $Rn, $Rm", pattern>,
- Requires<[IsThumb2, HasDSP]> {
+ Requires<[IsThumb2, HasDSP]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = op22_20;
@@ -2645,8 +2705,10 @@ def t2SMULTB : T2ThreeRegSMUL<0b001, 0b10, "smultb",
def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt",
[(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16))))]>;
-def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", []>;
-def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", []>;
+def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb",
+ [(set rGPR:$Rd, (ARMsmulwb rGPR:$Rn, rGPR:$Rm))]>;
+def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt",
+ [(set rGPR:$Rd, (ARMsmulwt rGPR:$Rn, rGPR:$Rm))]>;
def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn),
(t2SMULBB rGPR:$Rm, rGPR:$Rn)>;
@@ -2654,12 +2716,25 @@ def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))),
(t2SMULBT rGPR:$Rn, rGPR:$Rm)>;
def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm),
(t2SMULTB rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(int_arm_smulbb rGPR:$Rn, rGPR:$Rm),
+ (t2SMULBB rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(int_arm_smulbt rGPR:$Rn, rGPR:$Rm),
+ (t2SMULBT rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(int_arm_smultb rGPR:$Rn, rGPR:$Rm),
+ (t2SMULTB rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(int_arm_smultt rGPR:$Rn, rGPR:$Rm),
+ (t2SMULTT rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(int_arm_smulwb rGPR:$Rn, rGPR:$Rm),
+ (t2SMULWB rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(int_arm_smulwt rGPR:$Rn, rGPR:$Rm),
+ (t2SMULWT rGPR:$Rn, rGPR:$Rm)>;
class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc,
list<dag> pattern>
: T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMUL16,
opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
- Requires<[IsThumb2, HasDSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = op22_20;
@@ -2680,8 +2755,10 @@ def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb",
def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt",
[(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16)))))]>;
-def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", []>;
-def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", []>;
+def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwb rGPR:$Rn, rGPR:$Rm)))]>;
+def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwt rGPR:$Rn, rGPR:$Rm)))]>;
def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)),
(t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
@@ -2692,58 +2769,93 @@ def : Thumb2DSPMulPat<(add rGPR:$Ra,
(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)),
(t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
-class T2SMLAL<bits<3> op22_20, bits<4> op7_4, string opc, list<dag> pattern>
- : T2FourReg_mac<1, op22_20, op7_4,
- (outs rGPR:$Ra, rGPR:$Rd),
- (ins rGPR:$Rn, rGPR:$Rm),
- IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]>;
+def : Thumb2DSPPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc),
+ (t2SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : Thumb2DSPPat<(int_arm_smlabt GPR:$a, GPR:$b, GPR:$acc),
+ (t2SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : Thumb2DSPPat<(int_arm_smlatb GPR:$a, GPR:$b, GPR:$acc),
+ (t2SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : Thumb2DSPPat<(int_arm_smlatt GPR:$a, GPR:$b, GPR:$acc),
+ (t2SMLATT GPR:$a, GPR:$b, GPR:$acc)>;
+def : Thumb2DSPPat<(int_arm_smlawb GPR:$a, GPR:$b, GPR:$acc),
+ (t2SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : Thumb2DSPPat<(int_arm_smlawt GPR:$a, GPR:$b, GPR:$acc),
+ (t2SMLAWT GPR:$a, GPR:$b, GPR:$acc)>;
// Halfword multiple accumulate long: SMLAL<x><y>
-def t2SMLALBB : T2SMLAL<0b100, 0b1000, "smlalbb", []>;
-def t2SMLALBT : T2SMLAL<0b100, 0b1001, "smlalbt", []>;
-def t2SMLALTB : T2SMLAL<0b100, 0b1010, "smlaltb", []>;
-def t2SMLALTT : T2SMLAL<0b100, 0b1011, "smlaltt", []>;
-
-class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc>
+def t2SMLALBB : T2MlaLong<0b100, 0b1000, "smlalbb">,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLALBT : T2MlaLong<0b100, 0b1001, "smlalbt">,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLALTB : T2MlaLong<0b100, 0b1010, "smlaltb">,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLALTT : T2MlaLong<0b100, 0b1011, "smlaltt">,
+ Requires<[IsThumb2, HasDSP]>;
+
+def : Thumb2DSPPat<(ARMsmlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (t2SMLALBB $Rn, $Rm, $RLo, $RHi)>;
+def : Thumb2DSPPat<(ARMsmlalbt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (t2SMLALBT $Rn, $Rm, $RLo, $RHi)>;
+def : Thumb2DSPPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (t2SMLALTB $Rn, $Rm, $RLo, $RHi)>;
+def : Thumb2DSPPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (t2SMLALTT $Rn, $Rm, $RLo, $RHi)>;
+
+class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc,
+ Intrinsic intrinsic>
: T2ThreeReg_mac<0, op22_20, op7_4,
(outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm),
- IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]> {
+ IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (intrinsic rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[IsThumb2, HasDSP]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
let Inst{15-12} = 0b1111;
}
// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
-def t2SMUAD: T2DualHalfMul<0b010, 0b0000, "smuad">;
-def t2SMUADX: T2DualHalfMul<0b010, 0b0001, "smuadx">;
-def t2SMUSD: T2DualHalfMul<0b100, 0b0000, "smusd">;
-def t2SMUSDX: T2DualHalfMul<0b100, 0b0001, "smusdx">;
+def t2SMUAD: T2DualHalfMul<0b010, 0b0000, "smuad", int_arm_smuad>;
+def t2SMUADX: T2DualHalfMul<0b010, 0b0001, "smuadx", int_arm_smuadx>;
+def t2SMUSD: T2DualHalfMul<0b100, 0b0000, "smusd", int_arm_smusd>;
+def t2SMUSDX: T2DualHalfMul<0b100, 0b0001, "smusdx", int_arm_smusdx>;
-class T2DualHalfMulAdd<bits<3> op22_20, bits<4> op7_4, string opc>
+class T2DualHalfMulAdd<bits<3> op22_20, bits<4> op7_4, string opc,
+ Intrinsic intrinsic>
: T2FourReg_mac<0, op22_20, op7_4,
(outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra),
- IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", []>,
+ IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (intrinsic rGPR:$Rn, rGPR:$Rm, rGPR:$Ra))]>,
Requires<[IsThumb2, HasDSP]>;
-def t2SMLAD : T2DualHalfMulAdd<0b010, 0b0000, "smlad">;
-def t2SMLADX : T2DualHalfMulAdd<0b010, 0b0001, "smladx">;
-def t2SMLSD : T2DualHalfMulAdd<0b100, 0b0000, "smlsd">;
-def t2SMLSDX : T2DualHalfMulAdd<0b100, 0b0001, "smlsdx">;
+def t2SMLAD : T2DualHalfMulAdd<0b010, 0b0000, "smlad", int_arm_smlad>;
+def t2SMLADX : T2DualHalfMulAdd<0b010, 0b0001, "smladx", int_arm_smladx>;
+def t2SMLSD : T2DualHalfMulAdd<0b100, 0b0000, "smlsd", int_arm_smlsd>;
+def t2SMLSDX : T2DualHalfMulAdd<0b100, 0b0001, "smlsdx", int_arm_smlsdx>;
class T2DualHalfMulAddLong<bits<3> op22_20, bits<4> op7_4, string opc>
: T2FourReg_mac<1, op22_20, op7_4,
(outs rGPR:$Ra, rGPR:$Rd),
- (ins rGPR:$Rn, rGPR:$Rm),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi),
IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]>;
+ RegConstraint<"$Ra = $RLo, $Rd = $RHi">,
+ Requires<[IsThumb2, HasDSP]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def t2SMLALD : T2DualHalfMulAddLong<0b100, 0b1100, "smlald">;
def t2SMLALDX : T2DualHalfMulAddLong<0b100, 0b1101, "smlaldx">;
def t2SMLSLD : T2DualHalfMulAddLong<0b101, 0b1100, "smlsld">;
def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">;
+def : Thumb2DSPPat<(ARMSmlald rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi),
+ (t2SMLALD rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>;
+def : Thumb2DSPPat<(ARMSmlaldx rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi),
+ (t2SMLALDX rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>;
+def : Thumb2DSPPat<(ARMSmlsld rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi),
+ (t2SMLSLD rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>;
+def : Thumb2DSPPat<(ARMSmlsldx rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi),
+ (t2SMLSLDX rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>;
+
//===----------------------------------------------------------------------===//
// Division Instructions.
// Signed and unsigned division on v7-M
@@ -2751,7 +2863,8 @@ def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">;
def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"sdiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb, HasV8MBaseline]> {
+ Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>,
+ Sched<[WriteDIV]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011100;
let Inst{20} = 0b1;
@@ -2762,7 +2875,8 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"udiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb, HasV8MBaseline]> {
+ Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>,
+ Sched<[WriteDIV]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011101;
let Inst{20} = 0b1;
@@ -2819,7 +2933,7 @@ def t2PKHBT : T2ThreeReg<
[(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
(and (shl rGPR:$Rm, pkh_lsl_amt:$sh),
0xFFFF0000)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]>,
+ Requires<[HasDSP, IsThumb2]>,
Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2835,10 +2949,10 @@ def t2PKHBT : T2ThreeReg<
// Alternate cases for PKHBT where identities eliminate some nodes.
def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
(t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
(t2PKHBT rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
// will match the pattern below.
@@ -2848,7 +2962,7 @@ def t2PKHTB : T2ThreeReg<
[(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
(and (sra rGPR:$Rm, pkh_asr_amt:$sh),
0xFFFF)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]>,
+ Requires<[HasDSP, IsThumb2]>,
Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2867,14 +2981,14 @@ def t2PKHTB : T2ThreeReg<
// pkhtb src1, src2, asr (17..31).
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16:$sh)),
(t2PKHTB rGPR:$src1, rGPR:$src2, imm16:$sh)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (sra rGPR:$src2, imm16_31:$sh)),
(t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
(and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
(t2PKHTB rGPR:$src1, rGPR:$src2, imm1_15:$sh)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
//===----------------------------------------------------------------------===//
// CRC32 Instructions
@@ -3380,7 +3494,8 @@ def t2B : T2I<(outs), (ins thumb_br_target:$target), IIC_Br,
let AsmMatchConverter = "cvtThumbBranches";
}
-let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in {
+let Size = 4, isNotDuplicable = 1, isBranch = 1, isTerminator = 1,
+ isBarrier = 1, isIndirectBranch = 1 in {
// available in both v8-M.Baseline and Thumb2 targets
def t2BR_JT : t2basePseudoInst<(outs),
@@ -4216,13 +4331,13 @@ def : T2Pat<(and rGPR:$Rm, 0x000000FF), (t2UXTB rGPR:$Rm, 0)>,
def : T2Pat<(and rGPR:$Rm, 0x0000FFFF), (t2UXTH rGPR:$Rm, 0)>,
Requires<[IsThumb2]>;
def : T2Pat<(and rGPR:$Rm, 0x00FF00FF), (t2UXTB16 rGPR:$Rm, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0x00FF)),
(t2UXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0xFFFF)),
(t2UXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
}
def : T2Pat<(sext_inreg rGPR:$Src, i8), (t2SXTB rGPR:$Src, 0)>,
@@ -4231,10 +4346,10 @@ def : T2Pat<(sext_inreg rGPR:$Src, i16), (t2SXTH rGPR:$Src, 0)>,
Requires<[IsThumb2]>;
def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i8)),
(t2SXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)),
(t2SXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
// Atomic load/store patterns
def : T2Pat<(atomic_load_8 t2addrmode_imm12:$addr),
@@ -4325,26 +4440,26 @@ def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm",
pred:$p, cc_out:$s)>;
// add w/ negative immediates is just a sub.
-def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm",
(t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
-def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"add${p} $Rd, $Rn, $imm",
(t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
-def : t2InstAlias<"add${s}${p} $Rdn, $imm",
+def : t2InstSubst<"add${s}${p} $Rdn, $imm",
(t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
-def : t2InstAlias<"add${p} $Rdn, $imm",
+def : t2InstSubst<"add${p} $Rdn, $imm",
(t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
-def : t2InstAlias<"add${s}${p}.w $Rd, $Rn, $imm",
+def : t2InstSubst<"add${s}${p}.w $Rd, $Rn, $imm",
(t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
-def : t2InstAlias<"addw${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"addw${p} $Rd, $Rn, $imm",
(t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
-def : t2InstAlias<"add${s}${p}.w $Rdn, $imm",
+def : t2InstSubst<"add${s}${p}.w $Rdn, $imm",
(t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
-def : t2InstAlias<"addw${p} $Rdn, $imm",
+def : t2InstSubst<"addw${p} $Rdn, $imm",
(t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
@@ -4431,10 +4546,10 @@ def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm",
// input operands swapped when the shift amount is zero (i.e., unspecified).
def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm",
(t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm",
(t2PKHBT rGPR:$Rd, rGPR:$Rm, rGPR:$Rn, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
// PUSH/POP aliases for STM/LDM
def : t2InstAlias<"push${p}.w $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
@@ -4513,16 +4628,16 @@ def : t2InstAlias<"strh${p} $Rt, $addr",
// Extend instruction optional rotate operand.
def : InstAlias<"sxtab${p} $Rd, $Rn, $Rm",
(t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"sxtah${p} $Rd, $Rn, $Rm",
(t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
(t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"sxtb16${p} $Rd, $Rm",
(t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : t2InstAlias<"sxtb${p} $Rd, $Rm",
(t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
@@ -4535,16 +4650,16 @@ def : t2InstAlias<"sxth${p}.w $Rd, $Rm",
def : InstAlias<"uxtab${p} $Rd, $Rn, $Rm",
(t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"uxtah${p} $Rd, $Rn, $Rm",
(t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
(t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"uxtb16${p} $Rd, $Rm",
(t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : t2InstAlias<"uxtb${p} $Rd, $Rm",
(t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
@@ -4560,7 +4675,7 @@ def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot",
(t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
def : InstAlias<"uxtb16${p} $Rd, $Rm$rot",
(t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : t2InstAlias<"uxth${p} $Rd, $Rm$rot",
(t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
@@ -4568,41 +4683,54 @@ def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot",
(t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
def : InstAlias<"sxtb16${p} $Rd, $Rm$rot",
(t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
(t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
// "mov Rd, t2_so_imm_not" can be handled via "mvn" in assembly, just like
// for isel.
-def : t2InstAlias<"mov${p} $Rd, $imm",
+def : t2InstSubst<"mov${p} $Rd, $imm",
(t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
-def : t2InstAlias<"mvn${p} $Rd, $imm",
- (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+def : t2InstSubst<"mvn${s}${p} $Rd, $imm",
+ (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>;
// Same for AND <--> BIC
-def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"bic${s}${p} $Rd, $Rn, $imm",
(t2ANDri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : t2InstAlias<"bic${s}${p} $Rdn, $imm",
+def : t2InstSubst<"bic${s}${p} $Rdn, $imm",
(t2ANDri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"and${s}${p} $Rd, $Rn, $imm",
(t2BICri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : t2InstAlias<"and${s}${p} $Rdn, $imm",
+def : t2InstSubst<"and${s}${p} $Rdn, $imm",
(t2BICri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
+// And ORR <--> ORN
+def : t2InstSubst<"orn${s}${p} $Rd, $Rn, $imm",
+ (t2ORRri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstSubst<"orn${s}${p} $Rdn, $imm",
+ (t2ORRri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstSubst<"orr${s}${p} $Rd, $Rn, $imm",
+ (t2ORNri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstSubst<"orr${s}${p} $Rdn, $imm",
+ (t2ORNri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
// Likewise, "add Rd, t2_so_imm_neg" -> sub
-def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm",
(t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm,
pred:$p, cc_out:$s)>;
-def : t2InstAlias<"add${s}${p} $Rd, $imm",
+def : t2InstSubst<"add${s}${p} $Rd, $imm",
(t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm,
pred:$p, cc_out:$s)>;
// Same for CMP <--> CMN via t2_so_imm_neg
-def : t2InstAlias<"cmp${p} $Rd, $imm",
+def : t2InstSubst<"cmp${p} $Rd, $imm",
(t2CMNri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
-def : t2InstAlias<"cmn${p} $Rd, $imm",
+def : t2InstSubst<"cmn${p} $Rd, $imm",
(t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
@@ -4616,6 +4744,8 @@ def : t2InstAlias<"neg${s}${p} $Rd, $Rm",
// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for
// these, unfortunately.
+// FIXME: LSL #0 in the shift should allow SP to be used as either the
+// source or destination (but not both).
def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift",
(ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift",
@@ -4626,6 +4756,16 @@ def t2MOVsr: t2AsmPseudo<"mov${p} $Rd, $shift",
def t2MOVSsr: t2AsmPseudo<"movs${p} $Rd, $shift",
(ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+// Aliases for the above with the .w qualifier
+def : t2InstAlias<"mov${p}.w $Rd, $shift",
+ (t2MOVsi rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+def : t2InstAlias<"movs${p}.w $Rd, $shift",
+ (t2MOVSsi rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+def : t2InstAlias<"mov${p}.w $Rd, $shift",
+ (t2MOVsr rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+def : t2InstAlias<"movs${p}.w $Rd, $shift",
+ (t2MOVSsr rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+
// ADR w/o the .w suffix
def : t2InstAlias<"adr${p} $Rd, $addr",
(t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>;
@@ -4659,7 +4799,7 @@ def : t2InstAlias<"add${p} $Rd, pc, $imm",
// Pseudo instruction ldr Rt, =immediate
def t2LDRConstPool
: t2AsmPseudo<"ldr${p} $Rt, $immediate",
- (ins GPRnopc:$Rt, const_pool_asm_imm:$immediate, pred:$p)>;
+ (ins GPR:$Rt, const_pool_asm_imm:$immediate, pred:$p)>;
// Version w/ the .w suffix.
def : t2InstAlias<"ldr${p}.w $Rt, $immediate",
(t2LDRConstPool GPRnopc:$Rt,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index e990486..5d887c4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -11,14 +11,17 @@
//
//===----------------------------------------------------------------------===//
-def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_CMPFP0 : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisVT<1, i32>]>;
def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>]>;
+def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, f64>]>;
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
-def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMFCmp, [SDNPOutGlue]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>;
//===----------------------------------------------------------------------===//
// Operand Definitions.
@@ -336,13 +339,15 @@ let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VADDD : ADbI<0b11100, 0b11, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPALU64]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VADDS : ASbIn<0b11100, 0b11, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
+ [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPALU32]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -352,19 +357,22 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VADDH : AHbI<0b11100, 0b11, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPALU32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPALU64]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
+ [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPALU32]>{
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -374,37 +382,43 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VSUBH : AHbI<0b11100, 0b11, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPALU32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPDIV64]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VDIVS : ASbI<0b11101, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+ [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPDIV32]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VDIVH : AHbI<0b11101, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPDIV32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VMULD : ADbI<0b11100, 0b10, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VMULS : ASbIn<0b11100, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
+ [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -414,17 +428,20 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VMULH : AHbI<0b11100, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
def VNMULD : ADbI<0b11100, 0b10, 1, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
+ [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>,
+ Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
def VNMULS : ASbI<0b11100, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
+ [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -433,7 +450,8 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
def VNMULH : AHbI<0b11100, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
multiclass vsel_inst<string op, bits<2> opc, int CC> {
let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
@@ -501,12 +519,12 @@ let Defs = [FPSCR_NZCV] in {
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
- [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 1))]>;
def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
- [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 1))]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -517,17 +535,15 @@ def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0,
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm",
[]>;
-
-// FIXME: Verify encoding after integrated assembler is working.
def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
- [/* For disassembly only; pattern left blank */]>;
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 0))]>;
def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 0))]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -566,7 +582,7 @@ let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
- [(arm_cmpfp0 (f64 DPR:$Dd))]> {
+ [(arm_cmpfp0 (f64 DPR:$Dd), (i32 1))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -574,7 +590,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
- [(arm_cmpfp0 SPR:$Sd)]> {
+ [(arm_cmpfp0 SPR:$Sd, (i32 1))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -591,11 +607,10 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
let Inst{5} = 0;
}
-// FIXME: Verify encoding after integrated assembler is working.
def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfp0 (f64 DPR:$Dd), (i32 0))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -603,7 +618,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfp0 SPR:$Sd, (i32 0))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -624,7 +639,8 @@ def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
- [(set DPR:$Dd, (fpextend SPR:$Sm))]> {
+ [(set DPR:$Dd, (fpextend SPR:$Sm))]>,
+ Sched<[WriteFPCVT]> {
// Instruction operands.
bits<5> Dd;
bits<5> Sm;
@@ -641,7 +657,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
// Special case encoding: bits 11-8 is 0b1011.
def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (fpround DPR:$Dm))]> {
+ [(set SPR:$Sd, (fpround DPR:$Dm))]>,
+ Sched<[WriteFPCVT]> {
// Instruction operands.
bits<5> Sd;
bits<5> Dm;
@@ -663,31 +680,35 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
// Between half, single and double-precision. For disassembly only.
-// FIXME: Verify encoding after integrated assembler is working.
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
- []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]>,
+ Sched<[WriteFPCVT]> {
// Instruction operands.
bits<5> Sm;
@@ -946,12 +967,14 @@ defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
- [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPSQRT64]>;
def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
- [(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
+ [(set SPR:$Sd, (fsqrt SPR:$Sm))]>,
+ Sched<[WriteFPSQRT32]>;
def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
@@ -987,7 +1010,8 @@ def VINSH : ASuInp<0b11101, 0b11, 0b0000, 0b11, 0,
def VMOVRS : AVConv2I<0b11100001, 0b1010,
(outs GPR:$Rt), (ins SPR:$Sn),
IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
- [(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
+ [(set GPR:$Rt, (bitconvert SPR:$Sn))]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<4> Rt;
bits<5> Sn;
@@ -1010,7 +1034,8 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010,
(outs SPR:$Sn), (ins GPR:$Rt),
IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
[(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
- Requires<[HasVFP2, UseVMOVSR]> {
+ Requires<[HasVFP2, UseVMOVSR]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Sn;
bits<4> Rt;
@@ -1032,7 +1057,8 @@ let hasSideEffects = 0 in {
def VMOVRRD : AVConv3I<0b11000101, 0b1011,
(outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
- [/* FIXME: Can't write pattern for multiple result instr*/]> {
+ [(set GPR:$Rt, GPR:$Rt2, (arm_fmrrd DPR:$Dm))]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Dm;
bits<4> Rt;
@@ -1059,7 +1085,8 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011,
def VMOVRRS : AVConv3I<0b11000101, 0b1010,
(outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
- [/* For disassembly only; pattern left blank */]> {
+ [/* For disassembly only; pattern left blank */]>,
+ Sched<[WriteFPMOV]> {
bits<5> src1;
bits<4> Rt;
bits<4> Rt2;
@@ -1085,7 +1112,8 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010,
def VMOVDRR : AVConv5I<0b11000100, 0b1011,
(outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
- [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
+ [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Dm;
bits<4> Rt;
@@ -1128,7 +1156,8 @@ let hasSideEffects = 0 in
def VMOVSRR : AVConv5I<0b11000100, 0b1010,
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
- [/* For disassembly only; pattern left blank */]> {
+ [/* For disassembly only; pattern left blank */]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> dst1;
bits<4> src1;
@@ -1154,7 +1183,8 @@ def VMOVRH : AVConv2I<0b11100001, 0b1001,
(outs GPR:$Rt), (ins SPR:$Sn),
IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
[]>,
- Requires<[HasFullFP16]> {
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<4> Rt;
bits<5> Sn;
@@ -1173,7 +1203,8 @@ def VMOVHR : AVConv4I<0b11100000, 0b1001,
(outs SPR:$Sn), (ins GPR:$Rt),
IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
[]>,
- Requires<[HasFullFP16]> {
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Sn;
bits<4> Rt;
@@ -1254,7 +1285,8 @@ class AVConv1IHs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // s32
}
@@ -1269,7 +1301,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
(outs SPR:$Sd),(ins SPR:$Sm),
IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // s32
// Some single precision VFP instructions may be executed on both NEON and
@@ -1286,14 +1319,16 @@ def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTIH, "vcvt", ".f16.s32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // s32
}
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // u32
}
@@ -1308,7 +1343,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // u32
// Some single precision VFP instructions may be executed on both NEON and
@@ -1325,7 +1361,8 @@ def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTIH, "vcvt", ".f16.u32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // u32
}
@@ -1390,7 +1427,8 @@ class AVConv1IsH_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
@@ -1405,7 +1443,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
// Some single precision VFP instructions may be executed on both NEON and
@@ -1423,14 +1462,16 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
@@ -1445,7 +1486,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
// Some single precision VFP instructions may be executed on both NEON and
@@ -1463,52 +1505,58 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
let Uses = [FPSCR] in {
-// FIXME: Verify encoding after integrated assembler is working.
def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
+ [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
- [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
+ [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvtr", ".s32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
+ [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
- [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
+ [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvtr", ".u32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
}
@@ -1528,8 +1576,7 @@ let Constraints = "$a = $dst" in {
class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
bit op5, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
- Sched<[WriteCvtFP]> {
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{0};
@@ -1540,8 +1587,7 @@ class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
bit op5, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
- Sched<[WriteCvtFP]> {
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{4};
@@ -1553,26 +1599,31 @@ class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOUHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTHI, "vcvt", ".u16.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOSLH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTHI, "vcvt", ".s32.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTHI, "vcvt", ".u32.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
- IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1604,45 +1655,54 @@ def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
// Fixed-Point to FP:
def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VUHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.u16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VSLTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.s32\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.u32\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1650,7 +1710,8 @@ def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1658,7 +1719,8 @@ def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1666,7 +1728,8 @@ def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1674,19 +1737,23 @@ def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
} // End of 'let Constraints = "$a = $dst" in'
@@ -1700,7 +1767,8 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1708,7 +1776,8 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1734,7 +1803,8 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1742,7 +1812,8 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1768,7 +1839,8 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1776,7 +1848,8 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1802,14 +1875,16 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1838,7 +1913,8 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1846,7 +1922,8 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
@@ -1856,7 +1933,8 @@ def VFMAH : AHbI<0b11101, 0b10, 0, 0,
IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm",
[]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFusedMAC]>;
+ Requires<[HasFullFP16,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
@@ -1880,7 +1958,8 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1888,7 +1967,8 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
@@ -1898,7 +1978,8 @@ def VFMSH : AHbI<0b11101, 0b10, 1, 0,
IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm",
[]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFusedMAC]>;
+ Requires<[HasFullFP16,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
@@ -1929,7 +2010,8 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1937,7 +2019,8 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
@@ -1947,7 +2030,8 @@ def VFNMAH : AHbI<0b11101, 0b01, 1, 0,
IIC_fpFMAC16, "vfnma", ".f16\t$Sd, $Sn, $Sm",
[]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFusedMAC]>;
+ Requires<[HasFullFP16,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
@@ -1978,14 +2062,16 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
@@ -1995,7 +2081,8 @@ def VFNMSH : AHbI<0b11101, 0b01, 0, 0,
IIC_fpFMAC16, "vfnms", ".f16\t$Sd, $Sn, $Sm",
[]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFusedMAC]>;
+ Requires<[HasFullFP16,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index 2bdbe4f..faed6b8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -11,25 +11,112 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
-#include "ARMInstructionSelector.h"
#include "ARMRegisterBankInfo.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "arm-isel"
+#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+
using namespace llvm;
#ifndef LLVM_BUILD_GLOBAL_ISEL
#error "You shouldn't build this"
#endif
-ARMInstructionSelector::ARMInstructionSelector(const ARMSubtarget &STI,
+namespace {
+
+#define GET_GLOBALISEL_PREDICATE_BITSET
+#include "ARMGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATE_BITSET
+
+class ARMInstructionSelector : public InstructionSelector {
+public:
+ ARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI,
+ const ARMRegisterBankInfo &RBI);
+
+ bool select(MachineInstr &I) const override;
+
+private:
+ bool selectImpl(MachineInstr &I) const;
+
+ struct CmpConstants;
+ struct InsertInfo;
+
+ bool selectCmp(CmpConstants Helper, MachineInstrBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+
+ // Helper for inserting a comparison sequence that sets \p ResReg to either 1
+ // if \p LHSReg and \p RHSReg are in the relationship defined by \p Cond, or
+ // \p PrevRes otherwise. In essence, it computes PrevRes OR (LHS Cond RHS).
+ bool insertComparison(CmpConstants Helper, InsertInfo I, unsigned ResReg,
+ ARMCC::CondCodes Cond, unsigned LHSReg, unsigned RHSReg,
+ unsigned PrevRes) const;
+
+ // Set \p DestReg to \p Constant.
+ void putConstant(InsertInfo I, unsigned DestReg, unsigned Constant) const;
+
+ bool selectSelect(MachineInstrBuilder &MIB, MachineRegisterInfo &MRI) const;
+
+ // Check if the types match and both operands have the expected size and
+ // register bank.
+ bool validOpRegPair(MachineRegisterInfo &MRI, unsigned LHS, unsigned RHS,
+ unsigned ExpectedSize, unsigned ExpectedRegBankID) const;
+
+ // Check if the register has the expected size and register bank.
+ bool validReg(MachineRegisterInfo &MRI, unsigned Reg, unsigned ExpectedSize,
+ unsigned ExpectedRegBankID) const;
+
+ const ARMBaseInstrInfo &TII;
+ const ARMBaseRegisterInfo &TRI;
+ const ARMBaseTargetMachine &TM;
+ const ARMRegisterBankInfo &RBI;
+ const ARMSubtarget &STI;
+
+#define GET_GLOBALISEL_PREDICATES_DECL
+#include "ARMGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_DECL
+
+// We declare the temporaries used by selectImpl() in the class to minimize the
+// cost of constructing placeholder values.
+#define GET_GLOBALISEL_TEMPORARIES_DECL
+#include "ARMGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_DECL
+};
+} // end anonymous namespace
+
+namespace llvm {
+InstructionSelector *
+createARMInstructionSelector(const ARMBaseTargetMachine &TM,
+ const ARMSubtarget &STI,
+ const ARMRegisterBankInfo &RBI) {
+ return new ARMInstructionSelector(TM, STI, RBI);
+}
+}
+
+unsigned zero_reg = 0;
+
+#define GET_GLOBALISEL_IMPL
+#include "ARMGenGlobalISel.inc"
+#undef GET_GLOBALISEL_IMPL
+
+ARMInstructionSelector::ARMInstructionSelector(const ARMBaseTargetMachine &TM,
+ const ARMSubtarget &STI,
const ARMRegisterBankInfo &RBI)
: InstructionSelector(), TII(*STI.getInstrInfo()),
- TRI(*STI.getRegisterInfo()), RBI(RBI) {}
+ TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), STI(STI),
+#define GET_GLOBALISEL_PREDICATES_INIT
+#include "ARMGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_INIT
+#define GET_GLOBALISEL_TEMPORARIES_INIT
+#include "ARMGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_INIT
+{
+}
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
@@ -43,20 +130,21 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
assert(RegBank && "Can't get reg bank for virtual register");
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
- (void)DstSize;
- unsigned SrcReg = I.getOperand(1).getReg();
- const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
- (void)SrcSize;
- assert((DstSize == SrcSize ||
- // Copies are a means to setup initial types, the number of
- // bits may not exactly match.
- (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- DstSize <= SrcSize)) &&
- "Copy with different width?!");
-
- assert(RegBank->getID() == ARM::GPRRegBankID && "Unsupported reg bank");
+ assert((RegBank->getID() == ARM::GPRRegBankID ||
+ RegBank->getID() == ARM::FPRRegBankID) &&
+ "Unsupported reg bank");
+
const TargetRegisterClass *RC = &ARM::GPRRegClass;
+ if (RegBank->getID() == ARM::FPRRegBankID) {
+ if (DstSize == 32)
+ RC = &ARM::SPRRegClass;
+ else if (DstSize == 64)
+ RC = &ARM::DPRRegClass;
+ else
+ llvm_unreachable("Unsupported destination size");
+ }
+
// No need to constrain SrcReg. It will get constrained when
// we hit another of its uses or its defs.
// Copies do not have constraints.
@@ -68,6 +156,375 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
return true;
}
+static bool selectMergeValues(MachineInstrBuilder &MIB,
+ const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+ assert(TII.getSubtarget().hasVFP2() && "Can't select merge without VFP");
+
+ // We only support G_MERGE_VALUES as a way to stick together two scalar GPRs
+ // into one DPR.
+ unsigned VReg0 = MIB->getOperand(0).getReg();
+ (void)VReg0;
+ assert(MRI.getType(VReg0).getSizeInBits() == 64 &&
+ RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::FPRRegBankID &&
+ "Unsupported operand for G_MERGE_VALUES");
+ unsigned VReg1 = MIB->getOperand(1).getReg();
+ (void)VReg1;
+ assert(MRI.getType(VReg1).getSizeInBits() == 32 &&
+ RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Unsupported operand for G_MERGE_VALUES");
+ unsigned VReg2 = MIB->getOperand(2).getReg();
+ (void)VReg2;
+ assert(MRI.getType(VReg2).getSizeInBits() == 32 &&
+ RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Unsupported operand for G_MERGE_VALUES");
+
+ MIB->setDesc(TII.get(ARM::VMOVDRR));
+ MIB.add(predOps(ARMCC::AL));
+
+ return true;
+}
+
+static bool selectUnmergeValues(MachineInstrBuilder &MIB,
+ const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+ assert(TII.getSubtarget().hasVFP2() && "Can't select unmerge without VFP");
+
+ // We only support G_UNMERGE_VALUES as a way to break up one DPR into two
+ // GPRs.
+ unsigned VReg0 = MIB->getOperand(0).getReg();
+ (void)VReg0;
+ assert(MRI.getType(VReg0).getSizeInBits() == 32 &&
+ RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Unsupported operand for G_UNMERGE_VALUES");
+ unsigned VReg1 = MIB->getOperand(1).getReg();
+ (void)VReg1;
+ assert(MRI.getType(VReg1).getSizeInBits() == 32 &&
+ RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Unsupported operand for G_UNMERGE_VALUES");
+ unsigned VReg2 = MIB->getOperand(2).getReg();
+ (void)VReg2;
+ assert(MRI.getType(VReg2).getSizeInBits() == 64 &&
+ RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::FPRRegBankID &&
+ "Unsupported operand for G_UNMERGE_VALUES");
+
+ MIB->setDesc(TII.get(ARM::VMOVRRD));
+ MIB.add(predOps(ARMCC::AL));
+
+ return true;
+}
+
+/// Select the opcode for simple extensions (that translate to a single SXT/UXT
+/// instruction). Extension operations more complicated than that should not
+/// invoke this. Returns the original opcode if it doesn't know how to select a
+/// better one.
+static unsigned selectSimpleExtOpc(unsigned Opc, unsigned Size) {
+ using namespace TargetOpcode;
+
+ if (Size != 8 && Size != 16)
+ return Opc;
+
+ if (Opc == G_SEXT)
+ return Size == 8 ? ARM::SXTB : ARM::SXTH;
+
+ if (Opc == G_ZEXT)
+ return Size == 8 ? ARM::UXTB : ARM::UXTH;
+
+ return Opc;
+}
+
+/// Select the opcode for simple loads and stores. For types smaller than 32
+/// bits, the value will be zero extended. Returns the original opcode if it
+/// doesn't know how to select a better one.
+static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
+ unsigned Size) {
+ bool isStore = Opc == TargetOpcode::G_STORE;
+
+ if (RegBank == ARM::GPRRegBankID) {
+ switch (Size) {
+ case 1:
+ case 8:
+ return isStore ? ARM::STRBi12 : ARM::LDRBi12;
+ case 16:
+ return isStore ? ARM::STRH : ARM::LDRH;
+ case 32:
+ return isStore ? ARM::STRi12 : ARM::LDRi12;
+ default:
+ return Opc;
+ }
+ }
+
+ if (RegBank == ARM::FPRRegBankID) {
+ switch (Size) {
+ case 32:
+ return isStore ? ARM::VSTRS : ARM::VLDRS;
+ case 64:
+ return isStore ? ARM::VSTRD : ARM::VLDRD;
+ default:
+ return Opc;
+ }
+ }
+
+ return Opc;
+}
+
+// When lowering comparisons, we sometimes need to perform two compares instead
+// of just one. Get the condition codes for both comparisons. If only one is
+// needed, the second member of the pair is ARMCC::AL.
+static std::pair<ARMCC::CondCodes, ARMCC::CondCodes>
+getComparePreds(CmpInst::Predicate Pred) {
+ std::pair<ARMCC::CondCodes, ARMCC::CondCodes> Preds = {ARMCC::AL, ARMCC::AL};
+ switch (Pred) {
+ case CmpInst::FCMP_ONE:
+ Preds = {ARMCC::GT, ARMCC::MI};
+ break;
+ case CmpInst::FCMP_UEQ:
+ Preds = {ARMCC::EQ, ARMCC::VS};
+ break;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::FCMP_OEQ:
+ Preds.first = ARMCC::EQ;
+ break;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::FCMP_OGT:
+ Preds.first = ARMCC::GT;
+ break;
+ case CmpInst::ICMP_SGE:
+ case CmpInst::FCMP_OGE:
+ Preds.first = ARMCC::GE;
+ break;
+ case CmpInst::ICMP_UGT:
+ case CmpInst::FCMP_UGT:
+ Preds.first = ARMCC::HI;
+ break;
+ case CmpInst::FCMP_OLT:
+ Preds.first = ARMCC::MI;
+ break;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::FCMP_OLE:
+ Preds.first = ARMCC::LS;
+ break;
+ case CmpInst::FCMP_ORD:
+ Preds.first = ARMCC::VC;
+ break;
+ case CmpInst::FCMP_UNO:
+ Preds.first = ARMCC::VS;
+ break;
+ case CmpInst::FCMP_UGE:
+ Preds.first = ARMCC::PL;
+ break;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::FCMP_ULT:
+ Preds.first = ARMCC::LT;
+ break;
+ case CmpInst::ICMP_SLE:
+ case CmpInst::FCMP_ULE:
+ Preds.first = ARMCC::LE;
+ break;
+ case CmpInst::FCMP_UNE:
+ case CmpInst::ICMP_NE:
+ Preds.first = ARMCC::NE;
+ break;
+ case CmpInst::ICMP_UGE:
+ Preds.first = ARMCC::HS;
+ break;
+ case CmpInst::ICMP_ULT:
+ Preds.first = ARMCC::LO;
+ break;
+ default:
+ break;
+ }
+ assert(Preds.first != ARMCC::AL && "No comparisons needed?");
+ return Preds;
+}
+
+struct ARMInstructionSelector::CmpConstants {
+ CmpConstants(unsigned CmpOpcode, unsigned FlagsOpcode, unsigned OpRegBank,
+ unsigned OpSize)
+ : ComparisonOpcode(CmpOpcode), ReadFlagsOpcode(FlagsOpcode),
+ OperandRegBankID(OpRegBank), OperandSize(OpSize) {}
+
+ // The opcode used for performing the comparison.
+ const unsigned ComparisonOpcode;
+
+ // The opcode used for reading the flags set by the comparison. May be
+ // ARM::INSTRUCTION_LIST_END if we don't need to read the flags.
+ const unsigned ReadFlagsOpcode;
+
+ // The assumed register bank ID for the operands.
+ const unsigned OperandRegBankID;
+
+ // The assumed size in bits for the operands.
+ const unsigned OperandSize;
+};
+
+struct ARMInstructionSelector::InsertInfo {
+ InsertInfo(MachineInstrBuilder &MIB)
+ : MBB(*MIB->getParent()), InsertBefore(std::next(MIB->getIterator())),
+ DbgLoc(MIB->getDebugLoc()) {}
+
+ MachineBasicBlock &MBB;
+ const MachineBasicBlock::instr_iterator InsertBefore;
+ const DebugLoc &DbgLoc;
+};
+
+void ARMInstructionSelector::putConstant(InsertInfo I, unsigned DestReg,
+ unsigned Constant) const {
+ (void)BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVi))
+ .addDef(DestReg)
+ .addImm(Constant)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+}
+
+bool ARMInstructionSelector::validOpRegPair(MachineRegisterInfo &MRI,
+ unsigned LHSReg, unsigned RHSReg,
+ unsigned ExpectedSize,
+ unsigned ExpectedRegBankID) const {
+ return MRI.getType(LHSReg) == MRI.getType(RHSReg) &&
+ validReg(MRI, LHSReg, ExpectedSize, ExpectedRegBankID) &&
+ validReg(MRI, RHSReg, ExpectedSize, ExpectedRegBankID);
+}
+
+bool ARMInstructionSelector::validReg(MachineRegisterInfo &MRI, unsigned Reg,
+ unsigned ExpectedSize,
+ unsigned ExpectedRegBankID) const {
+ if (MRI.getType(Reg).getSizeInBits() != ExpectedSize) {
+ DEBUG(dbgs() << "Unexpected size for register");
+ return false;
+ }
+
+ if (RBI.getRegBank(Reg, MRI, TRI)->getID() != ExpectedRegBankID) {
+ DEBUG(dbgs() << "Unexpected register bank for register");
+ return false;
+ }
+
+ return true;
+}
+
+bool ARMInstructionSelector::selectCmp(CmpConstants Helper,
+ MachineInstrBuilder &MIB,
+ MachineRegisterInfo &MRI) const {
+ const InsertInfo I(MIB);
+
+ auto ResReg = MIB->getOperand(0).getReg();
+ if (!validReg(MRI, ResReg, 1, ARM::GPRRegBankID))
+ return false;
+
+ auto Cond =
+ static_cast<CmpInst::Predicate>(MIB->getOperand(1).getPredicate());
+ if (Cond == CmpInst::FCMP_TRUE || Cond == CmpInst::FCMP_FALSE) {
+ putConstant(I, ResReg, Cond == CmpInst::FCMP_TRUE ? 1 : 0);
+ MIB->eraseFromParent();
+ return true;
+ }
+
+ auto LHSReg = MIB->getOperand(2).getReg();
+ auto RHSReg = MIB->getOperand(3).getReg();
+ if (!validOpRegPair(MRI, LHSReg, RHSReg, Helper.OperandSize,
+ Helper.OperandRegBankID))
+ return false;
+
+ auto ARMConds = getComparePreds(Cond);
+ auto ZeroReg = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ putConstant(I, ZeroReg, 0);
+
+ if (ARMConds.second == ARMCC::AL) {
+ // Simple case, we only need one comparison and we're done.
+ if (!insertComparison(Helper, I, ResReg, ARMConds.first, LHSReg, RHSReg,
+ ZeroReg))
+ return false;
+ } else {
+ // Not so simple, we need two successive comparisons.
+ auto IntermediateRes = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ if (!insertComparison(Helper, I, IntermediateRes, ARMConds.first, LHSReg,
+ RHSReg, ZeroReg))
+ return false;
+ if (!insertComparison(Helper, I, ResReg, ARMConds.second, LHSReg, RHSReg,
+ IntermediateRes))
+ return false;
+ }
+
+ MIB->eraseFromParent();
+ return true;
+}
+
+bool ARMInstructionSelector::insertComparison(CmpConstants Helper, InsertInfo I,
+ unsigned ResReg,
+ ARMCC::CondCodes Cond,
+ unsigned LHSReg, unsigned RHSReg,
+ unsigned PrevRes) const {
+ // Perform the comparison.
+ auto CmpI =
+ BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(Helper.ComparisonOpcode))
+ .addUse(LHSReg)
+ .addUse(RHSReg)
+ .add(predOps(ARMCC::AL));
+ if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI))
+ return false;
+
+ // Read the comparison flags (if necessary).
+ if (Helper.ReadFlagsOpcode != ARM::INSTRUCTION_LIST_END) {
+ auto ReadI = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc,
+ TII.get(Helper.ReadFlagsOpcode))
+ .add(predOps(ARMCC::AL));
+ if (!constrainSelectedInstRegOperands(*ReadI, TII, TRI, RBI))
+ return false;
+ }
+
+ // Select either 1 or the previous result based on the value of the flags.
+ auto Mov1I = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVCCi))
+ .addDef(ResReg)
+ .addUse(PrevRes)
+ .addImm(1)
+ .add(predOps(Cond, ARM::CPSR));
+ if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI))
+ return false;
+
+ return true;
+}
+
+bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB,
+ MachineRegisterInfo &MRI) const {
+ auto &MBB = *MIB->getParent();
+ auto InsertBefore = std::next(MIB->getIterator());
+ auto &DbgLoc = MIB->getDebugLoc();
+
+ // Compare the condition to 0.
+ auto CondReg = MIB->getOperand(1).getReg();
+ assert(validReg(MRI, CondReg, 1, ARM::GPRRegBankID) &&
+ "Unsupported types for select operation");
+ auto CmpI = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::CMPri))
+ .addUse(CondReg)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+ if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI))
+ return false;
+
+ // Move a value into the result register based on the result of the
+ // comparison.
+ auto ResReg = MIB->getOperand(0).getReg();
+ auto TrueReg = MIB->getOperand(2).getReg();
+ auto FalseReg = MIB->getOperand(3).getReg();
+ assert(validOpRegPair(MRI, ResReg, TrueReg, 32, ARM::GPRRegBankID) &&
+ validOpRegPair(MRI, TrueReg, FalseReg, 32, ARM::GPRRegBankID) &&
+ "Unsupported types for select operation");
+ auto Mov1I = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::MOVCCr))
+ .addDef(ResReg)
+ .addUse(TrueReg)
+ .addUse(FalseReg)
+ .add(predOps(ARMCC::EQ, ARM::CPSR));
+ if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI))
+ return false;
+
+ MIB->eraseFromParent();
+ return true;
+}
+
bool ARMInstructionSelector::select(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -83,24 +540,211 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
return true;
}
+ if (selectImpl(I))
+ return true;
+
MachineInstrBuilder MIB{MF, I};
+ bool isSExt = false;
using namespace TargetOpcode;
switch (I.getOpcode()) {
- case G_ADD:
+ case G_SEXT:
+ isSExt = true;
+ LLVM_FALLTHROUGH;
+ case G_ZEXT: {
+ LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ // FIXME: Smaller destination sizes coming soon!
+ if (DstTy.getSizeInBits() != 32) {
+ DEBUG(dbgs() << "Unsupported destination size for extension");
+ return false;
+ }
+
+ LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
+ unsigned SrcSize = SrcTy.getSizeInBits();
+ switch (SrcSize) {
+ case 1: {
+ // ZExt boils down to & 0x1; for SExt we also subtract that from 0
+ I.setDesc(TII.get(ARM::ANDri));
+ MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp());
+
+ if (isSExt) {
+ unsigned SExtResult = I.getOperand(0).getReg();
+
+ // Use a new virtual register for the result of the AND
+ unsigned AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ I.getOperand(0).setReg(AndResult);
+
+ auto InsertBefore = std::next(I.getIterator());
+ auto SubI =
+ BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::RSBri))
+ .addDef(SExtResult)
+ .addUse(AndResult)
+ .addImm(0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ if (!constrainSelectedInstRegOperands(*SubI, TII, TRI, RBI))
+ return false;
+ }
+ break;
+ }
+ case 8:
+ case 16: {
+ unsigned NewOpc = selectSimpleExtOpc(I.getOpcode(), SrcSize);
+ if (NewOpc == I.getOpcode())
+ return false;
+ I.setDesc(TII.get(NewOpc));
+ MIB.addImm(0).add(predOps(ARMCC::AL));
+ break;
+ }
+ default:
+ DEBUG(dbgs() << "Unsupported source size for extension");
+ return false;
+ }
+ break;
+ }
+ case G_ANYEXT:
+ case G_TRUNC: {
+ // The high bits are undefined, so there's nothing special to do, just
+ // treat it as a copy.
+ auto SrcReg = I.getOperand(1).getReg();
+ auto DstReg = I.getOperand(0).getReg();
+
+ const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
+ const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+
+ if (SrcRegBank.getID() != DstRegBank.getID()) {
+ DEBUG(dbgs() << "G_TRUNC/G_ANYEXT operands on different register banks\n");
+ return false;
+ }
+
+ if (SrcRegBank.getID() != ARM::GPRRegBankID) {
+ DEBUG(dbgs() << "G_TRUNC/G_ANYEXT on non-GPR not supported yet\n");
+ return false;
+ }
+
+ I.setDesc(TII.get(COPY));
+ return selectCopy(I, TII, MRI, TRI, RBI);
+ }
+ case G_SELECT:
+ return selectSelect(MIB, MRI);
+ case G_ICMP: {
+ CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END,
+ ARM::GPRRegBankID, 32);
+ return selectCmp(Helper, MIB, MRI);
+ }
+ case G_FCMP: {
+ assert(TII.getSubtarget().hasVFP2() && "Can't select fcmp without VFP");
+
+ unsigned OpReg = I.getOperand(2).getReg();
+ unsigned Size = MRI.getType(OpReg).getSizeInBits();
+
+ if (Size == 64 && TII.getSubtarget().isFPOnlySP()) {
+ DEBUG(dbgs() << "Subtarget only supports single precision");
+ return false;
+ }
+ if (Size != 32 && Size != 64) {
+ DEBUG(dbgs() << "Unsupported size for G_FCMP operand");
+ return false;
+ }
+
+ CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT,
+ ARM::FPRRegBankID, Size);
+ return selectCmp(Helper, MIB, MRI);
+ }
+ case G_GEP:
I.setDesc(TII.get(ARM::ADDrr));
- AddDefaultCC(AddDefaultPred(MIB));
+ MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
break;
case G_FRAME_INDEX:
// Add 0 to the given frame index and hope it will eventually be folded into
// the user(s).
I.setDesc(TII.get(ARM::ADDri));
- AddDefaultCC(AddDefaultPred(MIB.addImm(0)));
+ MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp());
+ break;
+ case G_CONSTANT: {
+ unsigned Reg = I.getOperand(0).getReg();
+
+ if (!validReg(MRI, Reg, 32, ARM::GPRRegBankID))
+ return false;
+
+ I.setDesc(TII.get(ARM::MOVi));
+ MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
+
+ auto &Val = I.getOperand(1);
+ if (Val.isCImm()) {
+ if (Val.getCImm()->getBitWidth() > 32)
+ return false;
+ Val.ChangeToImmediate(Val.getCImm()->getZExtValue());
+ }
+
+ if (!Val.isImm()) {
+ return false;
+ }
+
+ break;
+ }
+ case G_STORE:
+ case G_LOAD: {
+ const auto &MemOp = **I.memoperands_begin();
+ if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
+ DEBUG(dbgs() << "Atomic load/store not supported yet\n");
+ return false;
+ }
+
+ unsigned Reg = I.getOperand(0).getReg();
+ unsigned RegBank = RBI.getRegBank(Reg, MRI, TRI)->getID();
+
+ LLT ValTy = MRI.getType(Reg);
+ const auto ValSize = ValTy.getSizeInBits();
+
+ assert((ValSize != 64 || TII.getSubtarget().hasVFP2()) &&
+ "Don't know how to load/store 64-bit value without VFP");
+
+ const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize);
+ if (NewOpc == G_LOAD || NewOpc == G_STORE)
+ return false;
+
+ I.setDesc(TII.get(NewOpc));
+
+ if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH)
+ // LDRH has a funny addressing mode (there's already a FIXME for it).
+ MIB.addReg(0);
+ MIB.addImm(0).add(predOps(ARMCC::AL));
break;
- case G_LOAD:
- I.setDesc(TII.get(ARM::LDRi12));
- AddDefaultPred(MIB.addImm(0));
+ }
+ case G_MERGE_VALUES: {
+ if (!selectMergeValues(MIB, TII, MRI, TRI, RBI))
+ return false;
+ break;
+ }
+ case G_UNMERGE_VALUES: {
+ if (!selectUnmergeValues(MIB, TII, MRI, TRI, RBI))
+ return false;
break;
+ }
+ case G_BRCOND: {
+ if (!validReg(MRI, I.getOperand(0).getReg(), 1, ARM::GPRRegBankID)) {
+ DEBUG(dbgs() << "Unsupported condition register for G_BRCOND");
+ return false;
+ }
+
+ // Set the flags.
+ auto Test = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::TSTri))
+ .addReg(I.getOperand(0).getReg())
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
+ if (!constrainSelectedInstRegOperands(*Test, TII, TRI, RBI))
+ return false;
+
+ // Branch conditionally.
+ auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc))
+ .add(I.getOperand(1))
+ .add(predOps(ARMCC::EQ, ARM::CPSR));
+ if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI))
+ return false;
+ I.eraseFromParent();
+ return true;
+ }
default:
return false;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h
deleted file mode 100644
index 5072cdd..0000000
--- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h
+++ /dev/null
@@ -1,39 +0,0 @@
-//===- ARMInstructionSelector ------------------------------------*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file declares the targeting of the InstructionSelector class for ARM.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H
-#define LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H
-
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-
-namespace llvm {
-class ARMBaseInstrInfo;
-class ARMBaseRegisterInfo;
-class ARMBaseTargetMachine;
-class ARMRegisterBankInfo;
-class ARMSubtarget;
-
-class ARMInstructionSelector : public InstructionSelector {
-public:
- ARMInstructionSelector(const ARMSubtarget &STI,
- const ARMRegisterBankInfo &RBI);
-
- virtual bool select(MachineInstr &I) const override;
-
-private:
- const ARMBaseInstrInfo &TII;
- const ARMBaseRegisterInfo &TRI;
- const ARMRegisterBankInfo &RBI;
-};
-
-} // End llvm namespace.
-#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index 255ea4b..1c17c07 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -12,6 +12,11 @@
//===----------------------------------------------------------------------===//
#include "ARMLegalizerInfo.h"
+#include "ARMCallLowering.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
@@ -23,22 +28,328 @@ using namespace llvm;
#error "You shouldn't build this"
#endif
-ARMLegalizerInfo::ARMLegalizerInfo() {
+static bool AEABI(const ARMSubtarget &ST) {
+ return ST.isTargetAEABI() || ST.isTargetGNUAEABI() || ST.isTargetMuslAEABI();
+}
+
+ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
using namespace TargetOpcode;
const LLT p0 = LLT::pointer(0, 32);
+ const LLT s1 = LLT::scalar(1);
const LLT s8 = LLT::scalar(8);
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
setAction({G_FRAME_INDEX, p0}, Legal);
- setAction({G_LOAD, s32}, Legal);
- setAction({G_LOAD, 1, p0}, Legal);
+ for (unsigned Op : {G_LOAD, G_STORE}) {
+ for (auto Ty : {s1, s8, s16, s32, p0})
+ setAction({Op, Ty}, Legal);
+ setAction({Op, 1, p0}, Legal);
+ }
+
+ for (unsigned Op : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) {
+ for (auto Ty : {s1, s8, s16})
+ setAction({Op, Ty}, WidenScalar);
+ setAction({Op, s32}, Legal);
+ }
+
+ for (unsigned Op : {G_SDIV, G_UDIV}) {
+ for (auto Ty : {s8, s16})
+ setAction({Op, Ty}, WidenScalar);
+ if (ST.hasDivideInARMMode())
+ setAction({Op, s32}, Legal);
+ else
+ setAction({Op, s32}, Libcall);
+ }
+
+ for (unsigned Op : {G_SREM, G_UREM}) {
+ for (auto Ty : {s8, s16})
+ setAction({Op, Ty}, WidenScalar);
+ if (ST.hasDivideInARMMode())
+ setAction({Op, s32}, Lower);
+ else if (AEABI(ST))
+ setAction({Op, s32}, Custom);
+ else
+ setAction({Op, s32}, Libcall);
+ }
+
+ for (unsigned Op : {G_SEXT, G_ZEXT}) {
+ setAction({Op, s32}, Legal);
+ for (auto Ty : {s1, s8, s16})
+ setAction({Op, 1, Ty}, Legal);
+ }
+
+ setAction({G_GEP, p0}, Legal);
+ setAction({G_GEP, 1, s32}, Legal);
+
+ setAction({G_SELECT, s32}, Legal);
+ setAction({G_SELECT, p0}, Legal);
+ setAction({G_SELECT, 1, s1}, Legal);
+
+ setAction({G_BRCOND, s1}, Legal);
+
+ setAction({G_CONSTANT, s32}, Legal);
+ for (auto Ty : {s1, s8, s16})
+ setAction({G_CONSTANT, Ty}, WidenScalar);
+
+ setAction({G_ICMP, s1}, Legal);
+ for (auto Ty : {s8, s16})
+ setAction({G_ICMP, 1, Ty}, WidenScalar);
+ for (auto Ty : {s32, p0})
+ setAction({G_ICMP, 1, Ty}, Legal);
- for (auto Ty : {s8, s16, s32})
- setAction({G_ADD, Ty}, Legal);
+ if (!ST.useSoftFloat() && ST.hasVFP2()) {
+ setAction({G_FADD, s32}, Legal);
+ setAction({G_FADD, s64}, Legal);
+
+ setAction({G_LOAD, s64}, Legal);
+ setAction({G_STORE, s64}, Legal);
+
+ setAction({G_FCMP, s1}, Legal);
+ setAction({G_FCMP, 1, s32}, Legal);
+ setAction({G_FCMP, 1, s64}, Legal);
+ } else {
+ for (auto Ty : {s32, s64})
+ setAction({G_FADD, Ty}, Libcall);
+
+ setAction({G_FCMP, s1}, Legal);
+ setAction({G_FCMP, 1, s32}, Custom);
+ setAction({G_FCMP, 1, s64}, Custom);
+
+ if (AEABI(ST))
+ setFCmpLibcallsAEABI();
+ else
+ setFCmpLibcallsGNU();
+ }
+
+ for (unsigned Op : {G_FREM, G_FPOW})
+ for (auto Ty : {s32, s64})
+ setAction({Op, Ty}, Libcall);
computeTables();
}
+
+void ARMLegalizerInfo::setFCmpLibcallsAEABI() {
+ // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be
+ // default-initialized.
+ FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1);
+ FCmp32Libcalls[CmpInst::FCMP_OEQ] = {
+ {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_OGE] = {
+ {RTLIB::OGE_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_OGT] = {
+ {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_OLE] = {
+ {RTLIB::OLE_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_OLT] = {
+ {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_UNO] = {
+ {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_ONE] = {
+ {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE},
+ {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_UEQ] = {
+ {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE},
+ {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}};
+
+ FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1);
+ FCmp64Libcalls[CmpInst::FCMP_OEQ] = {
+ {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_OGE] = {
+ {RTLIB::OGE_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_OGT] = {
+ {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_OLE] = {
+ {RTLIB::OLE_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_OLT] = {
+ {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_UNO] = {
+ {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_ONE] = {
+ {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE},
+ {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_UEQ] = {
+ {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE},
+ {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}};
+}
+
+void ARMLegalizerInfo::setFCmpLibcallsGNU() {
+ // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be
+ // default-initialized.
+ FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1);
+ FCmp32Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F32, CmpInst::ICMP_SGE}};
+ FCmp32Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}};
+ FCmp32Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F32, CmpInst::ICMP_SLE}};
+ FCmp32Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F32, CmpInst::ICMP_SLT}};
+ FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_SGE}};
+ FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_SGT}};
+ FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SLE}};
+ FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_SLT}};
+ FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_NE}};
+ FCmp32Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F32, CmpInst::ICMP_NE}};
+ FCmp32Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT},
+ {RTLIB::OLT_F32, CmpInst::ICMP_SLT}};
+ FCmp32Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ},
+ {RTLIB::UO_F32, CmpInst::ICMP_NE}};
+
+ FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1);
+ FCmp64Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F64, CmpInst::ICMP_SGE}};
+ FCmp64Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}};
+ FCmp64Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F64, CmpInst::ICMP_SLE}};
+ FCmp64Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F64, CmpInst::ICMP_SLT}};
+ FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_SGE}};
+ FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_SGT}};
+ FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SLE}};
+ FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_SLT}};
+ FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_NE}};
+ FCmp64Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F64, CmpInst::ICMP_NE}};
+ FCmp64Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT},
+ {RTLIB::OLT_F64, CmpInst::ICMP_SLT}};
+ FCmp64Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ},
+ {RTLIB::UO_F64, CmpInst::ICMP_NE}};
+}
+
+ARMLegalizerInfo::FCmpLibcallsList
+ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate,
+ unsigned Size) const {
+ assert(CmpInst::isFPPredicate(Predicate) && "Unsupported FCmp predicate");
+ if (Size == 32)
+ return FCmp32Libcalls[Predicate];
+ if (Size == 64)
+ return FCmp64Libcalls[Predicate];
+ llvm_unreachable("Unsupported size for FCmp predicate");
+}
+
+bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ using namespace TargetOpcode;
+
+ MIRBuilder.setInstr(MI);
+
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case G_SREM:
+ case G_UREM: {
+ unsigned OriginalResult = MI.getOperand(0).getReg();
+ auto Size = MRI.getType(OriginalResult).getSizeInBits();
+ if (Size != 32)
+ return false;
+
+ auto Libcall =
+ MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
+
+ // Our divmod libcalls return a struct containing the quotient and the
+ // remainder. We need to create a virtual register for it.
+ auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+ Type *ArgTy = Type::getInt32Ty(Ctx);
+ StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true);
+ auto RetVal = MRI.createGenericVirtualRegister(
+ getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout()));
+
+ auto Status = createLibcall(MIRBuilder, Libcall, {RetVal, RetTy},
+ {{MI.getOperand(1).getReg(), ArgTy},
+ {MI.getOperand(2).getReg(), ArgTy}});
+ if (Status != LegalizerHelper::Legalized)
+ return false;
+
+ // The remainder is the second result of divmod. Split the return value into
+ // a new, unused register for the quotient and the destination of the
+ // original instruction for the remainder.
+ MIRBuilder.buildUnmerge(
+ {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult},
+ RetVal);
+ break;
+ }
+ case G_FCMP: {
+ assert(MRI.getType(MI.getOperand(2).getReg()) ==
+ MRI.getType(MI.getOperand(3).getReg()) &&
+ "Mismatched operands for G_FCMP");
+ auto OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+
+ auto OriginalResult = MI.getOperand(0).getReg();
+ auto Predicate =
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ auto Libcalls = getFCmpLibcalls(Predicate, OpSize);
+
+ if (Libcalls.empty()) {
+ assert((Predicate == CmpInst::FCMP_TRUE ||
+ Predicate == CmpInst::FCMP_FALSE) &&
+ "Predicate needs libcalls, but none specified");
+ MIRBuilder.buildConstant(OriginalResult,
+ Predicate == CmpInst::FCMP_TRUE ? 1 : 0);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+ assert((OpSize == 32 || OpSize == 64) && "Unsupported operand size");
+ auto *ArgTy = OpSize == 32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx);
+ auto *RetTy = Type::getInt32Ty(Ctx);
+
+ SmallVector<unsigned, 2> Results;
+ for (auto Libcall : Libcalls) {
+ auto LibcallResult = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ auto Status =
+ createLibcall(MIRBuilder, Libcall.LibcallID, {LibcallResult, RetTy},
+ {{MI.getOperand(2).getReg(), ArgTy},
+ {MI.getOperand(3).getReg(), ArgTy}});
+
+ if (Status != LegalizerHelper::Legalized)
+ return false;
+
+ auto ProcessedResult =
+ Libcalls.size() == 1
+ ? OriginalResult
+ : MRI.createGenericVirtualRegister(MRI.getType(OriginalResult));
+
+ // We have a result, but we need to transform it into a proper 1-bit 0 or
+ // 1, taking into account the different peculiarities of the values
+ // returned by the comparison functions.
+ CmpInst::Predicate ResultPred = Libcall.Predicate;
+ if (ResultPred == CmpInst::BAD_ICMP_PREDICATE) {
+ // We have a nice 0 or 1, and we just need to truncate it back to 1 bit
+ // to keep the types consistent.
+ MIRBuilder.buildTrunc(ProcessedResult, LibcallResult);
+ } else {
+ // We need to compare against 0.
+ assert(CmpInst::isIntPredicate(ResultPred) && "Unsupported predicate");
+ auto Zero = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ MIRBuilder.buildConstant(Zero, 0);
+ MIRBuilder.buildICmp(ResultPred, ProcessedResult, LibcallResult, Zero);
+ }
+ Results.push_back(ProcessedResult);
+ }
+
+ if (Results.size() != 1) {
+ assert(Results.size() == 2 && "Unexpected number of results");
+ MIRBuilder.buildOr(OriginalResult, Results[0], Results[1]);
+ }
+ break;
+ }
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
index ca3eea8..78ab941 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
@@ -14,16 +14,52 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H
#define LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H
+#include "llvm/ADT/IndexedMap.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/IR/Instructions.h"
namespace llvm {
-class LLVMContext;
+class ARMSubtarget;
/// This class provides the information for the target register banks.
class ARMLegalizerInfo : public LegalizerInfo {
public:
- ARMLegalizerInfo();
+ ARMLegalizerInfo(const ARMSubtarget &ST);
+
+ bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const override;
+
+private:
+ void setFCmpLibcallsGNU();
+ void setFCmpLibcallsAEABI();
+
+ struct FCmpLibcallInfo {
+ // Which libcall this is.
+ RTLIB::Libcall LibcallID;
+
+ // The predicate to be used when comparing the value returned by the
+ // function with a relevant constant (currently hard-coded to zero). This is
+ // necessary because often the libcall will return e.g. a value greater than
+ // 0 to represent 'true' and anything negative to represent 'false', or
+ // maybe 0 to represent 'true' and non-zero for 'false'. If no comparison is
+ // needed, this should be CmpInst::BAD_ICMP_PREDICATE.
+ CmpInst::Predicate Predicate;
+ };
+ using FCmpLibcallsList = SmallVector<FCmpLibcallInfo, 2>;
+
+ // Map from each FCmp predicate to the corresponding libcall infos. A FCmp
+ // instruction may be lowered to one or two libcalls, which is why we need a
+ // list. If two libcalls are needed, their results will be OR'ed.
+ using FCmpLibcallsMapTy = IndexedMap<FCmpLibcallsList>;
+
+ FCmpLibcallsMapTy FCmp32Libcalls;
+ FCmpLibcallsMapTy FCmp64Libcalls;
+
+ // Get the libcall(s) corresponding to \p Predicate for operands of \p Size
+ // bits.
+ FCmpLibcallsList getFCmpLibcalls(CmpInst::Predicate, unsigned Size) const;
};
} // End llvm namespace.
#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 48ab491..7a452d4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -33,7 +34,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -517,8 +517,12 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
if (InsertSub) {
// An instruction above couldn't be updated, so insert a sub.
- AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
- .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
+ .add(t1CondCodeOp(true))
+ .addReg(Base)
+ .addImm(WordOffset * 4)
+ .addImm(Pred)
+ .addReg(PredReg);
return;
}
@@ -534,9 +538,12 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
// information and *always* have to reset at the end of a block.
// See PR21029.
if (MBBI != MBB.end()) --MBBI;
- AddDefaultT1CC(
- BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
- .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
+ .add(t1CondCodeOp(true))
+ .addReg(Base)
+ .addImm(WordOffset * 4)
+ .addImm(Pred)
+ .addReg(PredReg);
}
}
@@ -602,13 +609,12 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
// Exception: If the base register is in the input reglist, Thumb1 LDM is
// non-writeback.
// It's also not possible to merge an STR of the base register in Thumb1.
- if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
+ if (isThumb1 && ContainsReg(Regs, Base)) {
assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
- if (Opcode == ARM::tLDRi) {
+ if (Opcode == ARM::tLDRi)
Writeback = false;
- } else if (Opcode == ARM::tSTRi) {
+ else if (Opcode == ARM::tSTRi)
return nullptr;
- }
}
ARM_AM::AMSubMode Mode = ARM_AM::ia;
@@ -700,8 +706,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
.addReg(Base, getKillRegState(KillOldBase));
} else
BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
- .addReg(Base, getKillRegState(KillOldBase))
- .addImm(Pred).addReg(PredReg);
+ .addReg(Base, getKillRegState(KillOldBase))
+ .add(predOps(Pred, PredReg));
// The following ADDS/SUBS becomes an update.
Base = NewBase;
@@ -710,17 +716,21 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
if (BaseOpc == ARM::tADDrSPi) {
assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
- .addImm(Pred).addReg(PredReg);
+ .addReg(Base, getKillRegState(KillOldBase))
+ .addImm(Offset / 4)
+ .add(predOps(Pred, PredReg));
} else
- AddDefaultT1CC(
- BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
- .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
- .addImm(Pred).addReg(PredReg);
+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
+ .add(t1CondCodeOp(true))
+ .addReg(Base, getKillRegState(KillOldBase))
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg));
} else {
BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
- .addImm(Pred).addReg(PredReg).addReg(0);
+ .addReg(Base, getKillRegState(KillOldBase))
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg))
+ .add(condCodeOp());
}
Base = NewBase;
BaseKill = true; // New base is always killed straight away.
@@ -1259,7 +1269,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// Transfer the rest of operands.
for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
- MIB.addOperand(MI->getOperand(OpNum));
+ MIB.add(MI->getOperand(OpNum));
// Transfer memoperands.
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
@@ -1392,14 +1402,19 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
} else {
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
- .addReg(Base, RegState::Define)
- .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
+ .addReg(Base, RegState::Define)
+ .addReg(Base)
+ .addReg(0)
+ .addImm(Imm)
+ .add(predOps(Pred, PredReg));
}
} else {
// t2LDR_PRE, t2LDR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
- .addReg(Base, RegState::Define)
- .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(Base, RegState::Define)
+ .addReg(Base)
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg));
}
} else {
MachineOperand &MO = MI->getOperand(0);
@@ -1410,13 +1425,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
// STR_PRE, STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
- .addReg(MO.getReg(), getKillRegState(MO.isKill()))
- .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base)
+ .addReg(0)
+ .addImm(Imm)
+ .add(predOps(Pred, PredReg));
} else {
// t2STR_PRE, t2STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
- .addReg(MO.getReg(), getKillRegState(MO.isKill()))
- .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base)
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg));
}
}
MBB.erase(MBBI);
@@ -1462,12 +1482,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
DebugLoc DL = MI.getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
- MIB.addOperand(Reg0Op).addOperand(Reg1Op)
- .addReg(BaseOp.getReg(), RegState::Define);
+ MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
} else {
assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
- MIB.addReg(BaseOp.getReg(), RegState::Define)
- .addOperand(Reg0Op).addOperand(Reg1Op);
+ MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
}
MIB.addReg(BaseOp.getReg(), RegState::Kill)
.addImm(Offset).addImm(Pred).addReg(PredReg);
@@ -1477,7 +1495,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
// Transfer implicit operands.
for (const MachineOperand &MO : MI.implicit_operands())
- MIB.addOperand(MO);
+ MIB.add(MO);
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MBB.erase(MBBI);
@@ -1891,8 +1909,9 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
for (auto Use : Prev->uses())
if (Use.isKill()) {
- AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
- .addReg(Use.getReg(), RegState::Kill))
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
+ .addReg(Use.getReg(), RegState::Kill)
+ .add(predOps(ARMCC::AL))
.copyImplicitOps(*MBBI);
MBB.erase(MBBI);
MBB.erase(Prev);
@@ -1942,6 +1961,7 @@ namespace {
static char ID;
ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
+ AliasAnalysis *AA;
const DataLayout *TD;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -1955,6 +1975,11 @@ namespace {
return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
}
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
private:
bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
unsigned &NewOpc, unsigned &EvenReg,
@@ -1984,6 +2009,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
TRI = STI->getRegisterInfo();
MRI = &Fn.getRegInfo();
MF = &Fn;
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool Modified = false;
for (MachineBasicBlock &MFI : Fn)
@@ -1997,28 +2023,19 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
MachineBasicBlock::iterator E,
SmallPtrSetImpl<MachineInstr*> &MemOps,
SmallSet<unsigned, 4> &MemRegs,
- const TargetRegisterInfo *TRI) {
+ const TargetRegisterInfo *TRI,
+ AliasAnalysis *AA) {
// Are there stores / loads / calls between them?
- // FIXME: This is overly conservative. We should make use of alias information
- // some day.
SmallSet<unsigned, 4> AddedRegPressure;
while (++I != E) {
if (I->isDebugValue() || MemOps.count(&*I))
continue;
if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
return false;
- if (isLd && I->mayStore())
- return false;
- if (!isLd) {
- if (I->mayLoad())
- return false;
- // It's not safe to move the first 'str' down.
- // str r1, [r0]
- // strh r5, [r0]
- // str r4, [r0, #+4]
- if (I->mayStore())
- return false;
- }
+ if (I->mayStore() || (!isLd && I->mayLoad()))
+ for (MachineInstr *MemOp : MemOps)
+ if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
+ return false;
for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
MachineOperand &MO = I->getOperand(j);
if (!MO.isReg())
@@ -2142,33 +2159,40 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
unsigned LastBytes = 0;
unsigned NumMove = 0;
for (int i = Ops.size() - 1; i >= 0; --i) {
+ // Make sure each operation has the same kind.
MachineInstr *Op = Ops[i];
- unsigned Loc = MI2LocMap[Op];
- if (Loc <= FirstLoc) {
- FirstLoc = Loc;
- FirstOp = Op;
- }
- if (Loc >= LastLoc) {
- LastLoc = Loc;
- LastOp = Op;
- }
-
unsigned LSMOpcode
= getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
if (LastOpcode && LSMOpcode != LastOpcode)
break;
+ // Check that we have a continuous set of offsets.
int Offset = getMemoryOpOffset(*Op);
unsigned Bytes = getLSMultipleTransferSize(Op);
if (LastBytes) {
if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
break;
}
+
+ // Don't try to reschedule too many instructions.
+ if (NumMove == 8) // FIXME: Tune this limit.
+ break;
+
+ // Found a mergable instruction; save information about it.
+ ++NumMove;
LastOffset = Offset;
LastBytes = Bytes;
LastOpcode = LSMOpcode;
- if (++NumMove == 8) // FIXME: Tune this limit.
- break;
+
+ unsigned Loc = MI2LocMap[Op];
+ if (Loc <= FirstLoc) {
+ FirstLoc = Loc;
+ FirstOp = Op;
+ }
+ if (Loc >= LastLoc) {
+ LastLoc = Loc;
+ LastOp = Op;
+ }
}
if (NumMove <= 1)
@@ -2176,7 +2200,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
else {
SmallPtrSet<MachineInstr*, 4> MemOps;
SmallSet<unsigned, 4> MemRegs;
- for (int i = NumMove-1; i >= 0; --i) {
+ for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
MemOps.insert(Ops[i]);
MemRegs.insert(Ops[i]->getOperand(0).getReg());
}
@@ -2186,7 +2210,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
if (DoMove)
DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
- MemOps, MemRegs, TRI);
+ MemOps, MemRegs, TRI, AA);
if (!DoMove) {
for (unsigned i = 0; i != NumMove; ++i)
Ops.pop_back();
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
index 07044b9..48b02d4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -14,23 +14,36 @@
#include "ARM.h"
#include "ARMAsmPrinter.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
-using namespace llvm;
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
+using namespace llvm;
MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
const MCSymbol *Symbol) {
+ MCSymbolRefExpr::VariantKind SymbolVariant = MCSymbolRefExpr::VK_None;
+ if (MO.getTargetFlags() & ARMII::MO_SBREL)
+ SymbolVariant = MCSymbolRefExpr::VK_ARM_SBREL;
+
const MCExpr *Expr =
- MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext);
switch (MO.getTargetFlags() & ARMII::MO_OPTION_MASK) {
default:
llvm_unreachable("Unknown target flag on symbol operand");
@@ -38,12 +51,12 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
break;
case ARMII::MO_LO16:
Expr =
- MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext);
Expr = ARMMCExpr::createLower16(Expr, OutContext);
break;
case ARMII::MO_HI16:
Expr =
- MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext);
Expr = ARMMCExpr::createUpper16(Expr, OutContext);
break;
}
@@ -75,11 +88,10 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
MO.getMBB()->getSymbol(), OutContext));
break;
- case MachineOperand::MO_GlobalAddress: {
+ case MachineOperand::MO_GlobalAddress:
MCOp = GetSymbolRef(MO,
GetARMGVSymbol(MO.getGlobal(), MO.getTargetFlags()));
break;
- }
case MachineOperand::MO_ExternalSymbol:
MCOp = GetSymbolRef(MO,
GetExternalSymbolSymbol(MO.getSymbolName()));
@@ -141,9 +153,7 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
break;
}
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
-
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp;
if (AP.lowerOperand(MO, MCOp)) {
if (MCOp.isImm() && EncodeImms) {
@@ -199,11 +209,9 @@ void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
.addImm(ARMCC::AL).addReg(0));
MCInst Noop;
- Subtarget->getInstrInfo()->getNoopForElfTarget(Noop);
+ Subtarget->getInstrInfo()->getNoop(Noop);
for (int8_t I = 0; I < NoopsInSledCount; I++)
- {
OutStreamer->EmitInstruction(Noop, getSubtargetInfo());
- }
OutStreamer->EmitLabel(Target);
recordSled(CurSled, MI, Kind);
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index 50d8f09..e25d36b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
using namespace llvm;
@@ -15,10 +16,4 @@ void ARMFunctionInfo::anchor() {}
ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
: isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()),
- hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()),
- StByValParamsPadding(0), ArgRegsSaveSize(0), ReturnRegsCount(0),
- HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false),
- FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
- GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false), ArgumentStackSize(0),
- IsSplitCSR(false), PromotedGlobalsIncrease(0) {}
+ hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()) {}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 8c485e8..8161167 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -14,11 +14,11 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
-#include "ARMSubtarget.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <utility>
namespace llvm {
@@ -29,42 +29,42 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// isThumb - True if this function is compiled under Thumb mode.
/// Used to initialized Align, so must precede it.
- bool isThumb;
+ bool isThumb = false;
/// hasThumb2 - True if the target architecture supports Thumb2. Do not use
/// to determine if function is compiled under Thumb mode, for that use
/// 'isThumb'.
- bool hasThumb2;
+ bool hasThumb2 = false;
/// StByValParamsPadding - For parameter that is split between
/// GPRs and memory; while recovering GPRs part, when
/// StackAlignment > 4, and GPRs-part-size mod StackAlignment != 0,
/// we need to insert gap before parameter start address. It allows to
/// "attach" GPR-part to the part that was passed via stack.
- unsigned StByValParamsPadding;
+ unsigned StByValParamsPadding = 0;
/// VarArgsRegSaveSize - Size of the register save area for vararg functions.
///
- unsigned ArgRegsSaveSize;
+ unsigned ArgRegsSaveSize = 0;
/// ReturnRegsCount - Number of registers used up in the return.
- unsigned ReturnRegsCount;
+ unsigned ReturnRegsCount = 0;
/// HasStackFrame - True if this function has a stack frame. Set by
/// determineCalleeSaves().
- bool HasStackFrame;
+ bool HasStackFrame = false;
/// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
/// emitPrologue.
- bool RestoreSPFromFP;
+ bool RestoreSPFromFP = false;
/// LRSpilledForFarJump - True if the LR register has been for spilled to
/// enable far jump.
- bool LRSpilledForFarJump;
+ bool LRSpilledForFarJump = false;
/// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
/// spill stack offset.
- unsigned FramePtrSpillOffset;
+ unsigned FramePtrSpillOffset = 0;
/// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
/// register spills areas. For Mac OS X:
@@ -77,16 +77,16 @@ class ARMFunctionInfo : public MachineFunctionInfo {
///
/// Also see AlignedDPRCSRegs below. Not all D-regs need to go in area 3.
/// Some may be spilled after the stack has been realigned.
- unsigned GPRCS1Offset;
- unsigned GPRCS2Offset;
- unsigned DPRCSOffset;
+ unsigned GPRCS1Offset = 0;
+ unsigned GPRCS2Offset = 0;
+ unsigned DPRCSOffset = 0;
/// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
/// areas.
- unsigned GPRCS1Size;
- unsigned GPRCS2Size;
- unsigned DPRCSAlignGapSize;
- unsigned DPRCSSize;
+ unsigned GPRCS1Size = 0;
+ unsigned GPRCS2Size = 0;
+ unsigned DPRCSAlignGapSize = 0;
+ unsigned DPRCSSize = 0;
/// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
/// the aligned portion of the stack frame. This is always a contiguous
@@ -95,15 +95,15 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// We do not keep track of the frame indices used for these registers - they
/// behave like any other frame index in the aligned stack frame. These
/// registers also aren't included in DPRCSSize above.
- unsigned NumAlignedDPRCS2Regs;
+ unsigned NumAlignedDPRCS2Regs = 0;
- unsigned PICLabelUId;
+ unsigned PICLabelUId = 0;
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
- int VarArgsFrameIndex;
+ int VarArgsFrameIndex = 0;
/// HasITBlocks - True if IT blocks have been inserted.
- bool HasITBlocks;
+ bool HasITBlocks = false;
/// CPEClones - Track constant pool entries clones created by Constant Island
/// pass.
@@ -111,7 +111,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// ArgumentStackSize - amount of bytes on stack consumed by the arguments
/// being passed on the stack
- unsigned ArgumentStackSize;
+ unsigned ArgumentStackSize = 0;
/// CoalescedWeights - mapping of basic blocks to the rolling counter of
/// coalesced weights.
@@ -119,26 +119,16 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// True if this function has a subset of CSRs that is handled explicitly via
/// copies.
- bool IsSplitCSR;
+ bool IsSplitCSR = false;
/// Globals that have had their storage promoted into the constant pool.
SmallPtrSet<const GlobalVariable*,2> PromotedGlobals;
/// The amount the literal pool has been increasedby due to promoted globals.
- int PromotedGlobalsIncrease;
+ int PromotedGlobalsIncrease = 0;
public:
- ARMFunctionInfo() :
- isThumb(false),
- hasThumb2(false),
- ArgRegsSaveSize(0), ReturnRegsCount(0), HasStackFrame(false),
- RestoreSPFromFP(false),
- LRSpilledForFarJump(false),
- FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
- GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
- NumAlignedDPRCS2Regs(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false),
- PromotedGlobalsIncrease(0) {}
+ ARMFunctionInfo() = default;
explicit ARMFunctionInfo(MachineFunction &MF);
@@ -250,6 +240,7 @@ public:
PromotedGlobalsIncrease = Sz;
}
};
-} // End llvm namespace
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp
new file mode 100644
index 0000000..1b6e97c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp
@@ -0,0 +1,57 @@
+//===- ARMMacroFusion.cpp - ARM Macro Fusion ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the ARM implementation of the DAG scheduling
+/// mutation to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMacroFusion.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+
+/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
+/// together. Given SecondMI, when FirstMI is unspecified, then check if
+/// SecondMI may be part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(TSI);
+
+ // Assume wildcards for unspecified instrs.
+ unsigned FirstOpcode =
+ FirstMI ? FirstMI->getOpcode()
+ : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode = SecondMI.getOpcode();
+
+ if (ST.hasFuseAES())
+ // Fuse AES crypto operations.
+ switch(SecondOpcode) {
+ // AES encode.
+ case ARM::AESMC :
+ return FirstOpcode == ARM::AESE ||
+ FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ // AES decode.
+ case ARM::AESIMC:
+ return FirstOpcode == ARM::AESD ||
+ FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ }
+
+ return false;
+}
+
+std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation () {
+ return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h
new file mode 100644
index 0000000..1e4fc66
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h
@@ -0,0 +1,24 @@
+//===- ARMMacroFusion.h - ARM Macro Fusion ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the ARM definition of the DAG scheduling mutation
+/// to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+/// Note that you have to add:
+/// DAG.addMutation(createARMMacroFusionDAGMutation());
+/// to ARMPassConfig::createMachineScheduler() to have an effect.
+std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation();
+
+} // llvm
diff --git a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
index 581d5fe..7e4d598 100644
--- a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -88,13 +88,15 @@ bool ARMOptimizeBarriersPass::runOnMachineFunction(MachineFunction &MF) {
}
}
}
+ bool Changed = false;
// Remove the tagged DMB
for (auto MI : ToRemove) {
MI->eraseFromParent();
++NumDMBsRemoved;
+ Changed = true;
}
- return NumDMBsRemoved > 0;
+ return Changed;
}
/// createARMOptimizeBarriersPass - Returns an instance of the remove double
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 324087d..8449302 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -13,11 +13,15 @@
#include "ARMRegisterBankInfo.h"
#include "ARMInstrInfo.h" // For the register classes
+#include "ARMSubtarget.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#define GET_TARGET_REGBANK_IMPL
+#include "ARMGenRegisterBank.inc"
+
using namespace llvm;
#ifndef LLVM_BUILD_GLOBAL_ISEL
@@ -29,44 +33,109 @@ using namespace llvm;
// into an ARMGenRegisterBankInfo.def (similar to AArch64).
namespace llvm {
namespace ARM {
-const uint32_t GPRCoverageData[] = {
- // Classes 0-31
- (1u << ARM::GPRRegClassID) | (1u << ARM::GPRwithAPSRRegClassID) |
- (1u << ARM::GPRnopcRegClassID) | (1u << ARM::rGPRRegClassID) |
- (1u << ARM::hGPRRegClassID) | (1u << ARM::tGPRRegClassID) |
- (1u << ARM::GPRnopc_and_hGPRRegClassID) |
- (1u << ARM::hGPR_and_rGPRRegClassID) | (1u << ARM::tcGPRRegClassID) |
- (1u << ARM::tGPR_and_tcGPRRegClassID) | (1u << ARM::GPRspRegClassID) |
- (1u << ARM::hGPR_and_tcGPRRegClassID),
- // Classes 32-63
- 0,
- // Classes 64-96
- 0,
- // FIXME: Some of the entries below this point can be safely removed once
- // this is tablegenerated. It's only needed because of the hardcoded
- // register class limit.
- // Classes 97-128
- 0,
- // Classes 129-160
- 0,
- // Classes 161-192
- 0,
- // Classes 193-224
- 0,
+enum PartialMappingIdx {
+ PMI_GPR,
+ PMI_SPR,
+ PMI_DPR,
+ PMI_Min = PMI_GPR,
+};
+
+RegisterBankInfo::PartialMapping PartMappings[]{
+ // GPR Partial Mapping
+ {0, 32, GPRRegBank},
+ // SPR Partial Mapping
+ {0, 32, FPRRegBank},
+ // DPR Partial Mapping
+ {0, 64, FPRRegBank},
};
-RegisterBank GPRRegBank(ARM::GPRRegBankID, "GPRB", 32, ARM::GPRCoverageData);
-RegisterBank *RegBanks[] = {&GPRRegBank};
+#ifndef NDEBUG
+static bool checkPartMapping(const RegisterBankInfo::PartialMapping &PM,
+ unsigned Start, unsigned Length,
+ unsigned RegBankID) {
+ return PM.StartIdx == Start && PM.Length == Length &&
+ PM.RegBank->getID() == RegBankID;
+}
+
+static void checkPartialMappings() {
+ assert(
+ checkPartMapping(PartMappings[PMI_GPR - PMI_Min], 0, 32, GPRRegBankID) &&
+ "Wrong mapping for GPR");
+ assert(
+ checkPartMapping(PartMappings[PMI_SPR - PMI_Min], 0, 32, FPRRegBankID) &&
+ "Wrong mapping for SPR");
+ assert(
+ checkPartMapping(PartMappings[PMI_DPR - PMI_Min], 0, 64, FPRRegBankID) &&
+ "Wrong mapping for DPR");
+}
+#endif
-RegisterBankInfo::PartialMapping GPRPartialMapping{0, 32, GPRRegBank};
+enum ValueMappingIdx {
+ InvalidIdx = 0,
+ GPR3OpsIdx = 1,
+ SPR3OpsIdx = 4,
+ DPR3OpsIdx = 7,
+};
RegisterBankInfo::ValueMapping ValueMappings[] = {
- {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}};
+ // invalid
+ {nullptr, 0},
+ // 3 ops in GPRs
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+ // 3 ops in SPRs
+ {&PartMappings[PMI_SPR - PMI_Min], 1},
+ {&PartMappings[PMI_SPR - PMI_Min], 1},
+ {&PartMappings[PMI_SPR - PMI_Min], 1},
+ // 3 ops in DPRs
+ {&PartMappings[PMI_DPR - PMI_Min], 1},
+ {&PartMappings[PMI_DPR - PMI_Min], 1},
+ {&PartMappings[PMI_DPR - PMI_Min], 1}};
+
+#ifndef NDEBUG
+static bool checkValueMapping(const RegisterBankInfo::ValueMapping &VM,
+ RegisterBankInfo::PartialMapping *BreakDown) {
+ return VM.NumBreakDowns == 1 && VM.BreakDown == BreakDown;
+}
+
+static void checkValueMappings() {
+ assert(checkValueMapping(ValueMappings[GPR3OpsIdx],
+ &PartMappings[PMI_GPR - PMI_Min]) &&
+ "Wrong value mapping for 3 GPR ops instruction");
+ assert(checkValueMapping(ValueMappings[GPR3OpsIdx + 1],
+ &PartMappings[PMI_GPR - PMI_Min]) &&
+ "Wrong value mapping for 3 GPR ops instruction");
+ assert(checkValueMapping(ValueMappings[GPR3OpsIdx + 2],
+ &PartMappings[PMI_GPR - PMI_Min]) &&
+ "Wrong value mapping for 3 GPR ops instruction");
+
+ assert(checkValueMapping(ValueMappings[SPR3OpsIdx],
+ &PartMappings[PMI_SPR - PMI_Min]) &&
+ "Wrong value mapping for 3 SPR ops instruction");
+ assert(checkValueMapping(ValueMappings[SPR3OpsIdx + 1],
+ &PartMappings[PMI_SPR - PMI_Min]) &&
+ "Wrong value mapping for 3 SPR ops instruction");
+ assert(checkValueMapping(ValueMappings[SPR3OpsIdx + 2],
+ &PartMappings[PMI_SPR - PMI_Min]) &&
+ "Wrong value mapping for 3 SPR ops instruction");
+
+ assert(checkValueMapping(ValueMappings[DPR3OpsIdx],
+ &PartMappings[PMI_DPR - PMI_Min]) &&
+ "Wrong value mapping for 3 DPR ops instruction");
+ assert(checkValueMapping(ValueMappings[DPR3OpsIdx + 1],
+ &PartMappings[PMI_DPR - PMI_Min]) &&
+ "Wrong value mapping for 3 DPR ops instruction");
+ assert(checkValueMapping(ValueMappings[DPR3OpsIdx + 2],
+ &PartMappings[PMI_DPR - PMI_Min]) &&
+ "Wrong value mapping for 3 DPR ops instruction");
+}
+#endif
} // end namespace arm
} // end namespace llvm
ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
- : RegisterBankInfo(ARM::RegBanks, ARM::NumRegisterBanks) {
+ : ARMGenRegisterBankInfo() {
static bool AlreadyInit = false;
// We have only one set of register banks, whatever the subtarget
// is. Therefore, the initialization of the RegBanks table should be
@@ -97,6 +166,11 @@ ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPR_and_tcGPRRegClassID)) &&
"Subclass not added?");
assert(RBGPR.getSize() == 32 && "GPRs should hold up to 32-bit");
+
+#ifndef NDEBUG
+ ARM::checkPartialMappings();
+ ARM::checkValueMappings();
+#endif
}
const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass(
@@ -105,8 +179,16 @@ const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass(
switch (RC.getID()) {
case GPRRegClassID:
+ case GPRnopcRegClassID:
+ case GPRspRegClassID:
case tGPR_and_tcGPRRegClassID:
+ case tGPRRegClassID:
return getRegBank(ARM::GPRRegBankID);
+ case SPR_8RegClassID:
+ case SPRRegClassID:
+ case DPR_8RegClassID:
+ case DPRRegClassID:
+ return getRegBank(ARM::FPRRegBankID);
default:
llvm_unreachable("Unsupported register kind");
}
@@ -114,37 +196,163 @@ const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass(
llvm_unreachable("Switch should handle all register classes");
}
-RegisterBankInfo::InstructionMapping
+const RegisterBankInfo::InstructionMapping &
ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
auto Opc = MI.getOpcode();
// Try the default logic for non-generic instructions that are either copies
// or already have some operands assigned to banks.
if (!isPreISelGenericOpcode(Opc)) {
- InstructionMapping Mapping = getInstrMappingImpl(MI);
+ const InstructionMapping &Mapping = getInstrMappingImpl(MI);
if (Mapping.isValid())
return Mapping;
}
using namespace TargetOpcode;
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned NumOperands = MI.getNumOperands();
- const ValueMapping *OperandsMapping = &ARM::ValueMappings[0];
+ const ValueMapping *OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx];
switch (Opc) {
case G_ADD:
- case G_LOAD:
+ case G_SUB:
+ case G_MUL:
+ case G_AND:
+ case G_OR:
+ case G_XOR:
+ case G_SDIV:
+ case G_UDIV:
+ case G_SEXT:
+ case G_ZEXT:
+ case G_ANYEXT:
+ case G_TRUNC:
+ case G_GEP:
// FIXME: We're abusing the fact that everything lives in a GPR for now; in
// the real world we would use different mappings.
- OperandsMapping = &ARM::ValueMappings[0];
+ OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx];
+ break;
+ case G_LOAD:
+ case G_STORE: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ OperandsMapping =
+ Ty.getSizeInBits() == 64
+ ? getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx]})
+ : &ARM::ValueMappings[ARM::GPR3OpsIdx];
break;
+ }
+ case G_FADD: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ assert((Ty.getSizeInBits() == 32 || Ty.getSizeInBits() == 64) &&
+ "Unsupported size for G_FADD");
+ OperandsMapping = Ty.getSizeInBits() == 64
+ ? &ARM::ValueMappings[ARM::DPR3OpsIdx]
+ : &ARM::ValueMappings[ARM::SPR3OpsIdx];
+ break;
+ }
+ case G_CONSTANT:
case G_FRAME_INDEX:
- OperandsMapping = getOperandsMapping({&ARM::ValueMappings[0], nullptr});
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
+ break;
+ case G_SELECT: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ (void)Ty;
+ LLT Ty2 = MRI.getType(MI.getOperand(1).getReg());
+ (void)Ty2;
+ assert(Ty.getSizeInBits() == 32 && "Unsupported size for G_SELECT");
+ assert(Ty2.getSizeInBits() == 1 && "Unsupported size for G_SELECT");
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx]});
+ break;
+ }
+ case G_ICMP: {
+ LLT Ty2 = MRI.getType(MI.getOperand(2).getReg());
+ (void)Ty2;
+ assert(Ty2.getSizeInBits() == 32 && "Unsupported size for G_ICMP");
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr,
+ &ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx]});
+ break;
+ }
+ case G_FCMP: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ (void)Ty;
+ LLT Ty1 = MRI.getType(MI.getOperand(2).getReg());
+ LLT Ty2 = MRI.getType(MI.getOperand(3).getReg());
+ (void)Ty2;
+ assert(Ty.getSizeInBits() == 1 && "Unsupported size for G_FCMP");
+ assert(Ty1.getSizeInBits() == Ty2.getSizeInBits() &&
+ "Mismatched operand sizes for G_FCMP");
+
+ unsigned Size = Ty1.getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Unsupported size for G_FCMP");
+
+ auto FPRValueMapping = Size == 32 ? &ARM::ValueMappings[ARM::SPR3OpsIdx]
+ : &ARM::ValueMappings[ARM::DPR3OpsIdx];
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr,
+ FPRValueMapping, FPRValueMapping});
+ break;
+ }
+ case G_MERGE_VALUES: {
+ // We only support G_MERGE_VALUES for creating a double precision floating
+ // point value out of two GPRs.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ LLT Ty1 = MRI.getType(MI.getOperand(1).getReg());
+ LLT Ty2 = MRI.getType(MI.getOperand(2).getReg());
+ if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 ||
+ Ty2.getSizeInBits() != 32)
+ return getInvalidInstructionMapping();
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx]});
+ break;
+ }
+ case G_UNMERGE_VALUES: {
+ // We only support G_UNMERGE_VALUES for splitting a double precision
+ // floating point value into two GPRs.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ LLT Ty1 = MRI.getType(MI.getOperand(1).getReg());
+ LLT Ty2 = MRI.getType(MI.getOperand(2).getReg());
+ if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 32 ||
+ Ty2.getSizeInBits() != 64)
+ return getInvalidInstructionMapping();
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::DPR3OpsIdx]});
+ break;
+ }
+ case G_BR:
+ OperandsMapping = getOperandsMapping({nullptr});
+ break;
+ case G_BRCOND:
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
break;
default:
- return InstructionMapping{};
+ return getInvalidInstructionMapping();
}
- return InstructionMapping{DefaultMappingID, /*Cost=*/1, OperandsMapping,
- NumOperands};
+#ifndef NDEBUG
+ for (unsigned i = 0; i < NumOperands; i++) {
+ for (const auto &Mapping : OperandsMapping[i]) {
+ assert(
+ (Mapping.RegBank->getID() != ARM::FPRRegBankID ||
+ MF.getSubtarget<ARMSubtarget>().hasVFP2()) &&
+ "Trying to use floating point register bank on target without vfp");
+ }
+ }
+#endif
+
+ return getInstructionMapping(DefaultMappingID, /*Cost=*/1, OperandsMapping,
+ NumOperands);
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
index 773920e..9650b35 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
@@ -16,26 +16,28 @@
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#define GET_REGBANK_DECLARATIONS
+#include "ARMGenRegisterBank.inc"
+
namespace llvm {
class TargetRegisterInfo;
-namespace ARM {
-enum {
- GPRRegBankID = 0, // General purpose registers
- NumRegisterBanks,
+class ARMGenRegisterBankInfo : public RegisterBankInfo {
+#define GET_TARGET_REGBANK_CLASS
+#include "ARMGenRegisterBank.inc"
};
-} // end namespace ARM
/// This class provides the information for the target register banks.
-class ARMRegisterBankInfo final : public RegisterBankInfo {
+class ARMRegisterBankInfo final : public ARMGenRegisterBankInfo {
public:
ARMRegisterBankInfo(const TargetRegisterInfo &TRI);
const RegisterBank &
getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
- InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
+ const InstructionMapping &
+ getInstrMapping(const MachineInstr &MI) const override;
};
} // End llvm namespace.
#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBanks.td b/contrib/llvm/lib/Target/ARM/ARMRegisterBanks.td
new file mode 100644
index 0000000..7cd2d60
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBanks.td
@@ -0,0 +1,14 @@
+//=- ARMRegisterBank.td - Describe the AArch64 Banks ---------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+def GPRRegBank : RegisterBank<"GPRB", [GPR, GPRwithAPSR]>;
+def FPRRegBank : RegisterBank<"FPRB", [SPR, DPR]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
index 02cbfb1..b10583b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -245,6 +245,10 @@ def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
// the general GPR register class above (MOV, e.g.)
def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>;
+// Thumb registers R0-R7 and the PC. Some instructions like TBB or THH allow
+// the PC to be used as a destination operand as well.
+def tGPRwithpc : RegisterClass<"ARM", [i32], 32, (add tGPR, PC)>;
+
// The high registers in thumb mode, R8-R15.
def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
index b7d2d34..53e012f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSchedule.td
+++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Instruction scheduling annotations for out-of-order CPUs.
+// Instruction scheduling annotations for in-order and out-of-order CPUs.
// These annotations are independent of the itinerary class defined below.
// Here we define the subtarget independent read/write per-operand resources.
// The subtarget schedule definitions will then map these to the subtarget's
@@ -54,6 +54,9 @@
// }
// def : ReadAdvance<ReadAdvanceALUsr, 3>;
+//===----------------------------------------------------------------------===//
+// Sched definitions for integer pipeline instructions
+//
// Basic ALU operation.
def WriteALU : SchedWrite;
def ReadALU : SchedRead;
@@ -69,29 +72,84 @@ def WriteCMP : SchedWrite;
def WriteCMPsi : SchedWrite;
def WriteCMPsr : SchedWrite;
-// Division.
-def WriteDiv : SchedWrite;
+// Multiplys.
+def WriteMUL16 : SchedWrite; // 16-bit multiply.
+def WriteMUL32 : SchedWrite; // 32-bit multiply.
+def WriteMUL64Lo : SchedWrite; // 64-bit result. Low reg.
+def WriteMUL64Hi : SchedWrite; // 64-bit result. High reg.
+def ReadMUL : SchedRead;
+
+// Multiply-accumulates.
+def WriteMAC16 : SchedWrite; // 16-bit mac.
+def WriteMAC32 : SchedWrite; // 32-bit mac.
+def WriteMAC64Lo : SchedWrite; // 64-bit mac. Low reg.
+def WriteMAC64Hi : SchedWrite; // 64-bit mac. High reg.
+def ReadMAC : SchedRead;
+
+// Divisions.
+def WriteDIV : SchedWrite;
-// Loads.
+// Loads/Stores.
def WriteLd : SchedWrite;
def WritePreLd : SchedWrite;
+def WriteST : SchedWrite;
// Branches.
def WriteBr : SchedWrite;
def WriteBrL : SchedWrite;
def WriteBrTbl : SchedWrite;
-// Fixpoint conversions.
-def WriteCvtFP : SchedWrite;
-
// Noop.
def WriteNoop : SchedWrite;
+//===----------------------------------------------------------------------===//
+// Sched definitions for floating-point and neon instructions
+//
+// Floating point conversions
+def WriteFPCVT : SchedWrite;
+def WriteFPMOV : SchedWrite; // FP -> GPR and vice-versa
+
+// ALU operations (32/64-bit)
+def WriteFPALU32 : SchedWrite;
+def WriteFPALU64 : SchedWrite;
+
+// Multiplication
+def WriteFPMUL32 : SchedWrite;
+def WriteFPMUL64 : SchedWrite;
+def ReadFPMUL : SchedRead; // multiplier read
+def ReadFPMAC : SchedRead; // accumulator read
+
+// Multiply-accumulate
+def WriteFPMAC32 : SchedWrite;
+def WriteFPMAC64 : SchedWrite;
+
+// Division
+def WriteFPDIV32 : SchedWrite;
+def WriteFPDIV64 : SchedWrite;
+
+// Square-root
+def WriteFPSQRT32 : SchedWrite;
+def WriteFPSQRT64 : SchedWrite;
+
+// Vector load and stores
+def WriteVLD1 : SchedWrite;
+def WriteVLD2 : SchedWrite;
+def WriteVLD3 : SchedWrite;
+def WriteVLD4 : SchedWrite;
+def WriteVST1 : SchedWrite;
+def WriteVST2 : SchedWrite;
+def WriteVST3 : SchedWrite;
+def WriteVST4 : SchedWrite;
+
+
// Define TII for use in SchedVariant Predicates.
def : PredicateProlog<[{
const ARMBaseInstrInfo *TII =
static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
(void)TII;
+ const ARMSubtarget *STI =
+ static_cast<const ARMSubtarget*>(SchedModel->getSubtargetInfo());
+ (void)STI;
}]>;
def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>;
@@ -365,3 +423,5 @@ include "ARMScheduleA8.td"
include "ARMScheduleA9.td"
include "ARMScheduleSwift.td"
include "ARMScheduleR52.td"
+include "ARMScheduleA57.td"
+include "ARMScheduleM3.td"
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA57.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA57.td
new file mode 100644
index 0000000..525079d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA57.td
@@ -0,0 +1,1471 @@
+//=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for ARM Cortex-A57 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// *** Common description and scheduling model parameters taken from AArch64 ***
+// The Cortex-A57 is a traditional superscalar microprocessor with a
+// conservative 3-wide in-order stage for decode and dispatch. Combined with the
+// much wider out-of-order issue stage, this produced a need to carefully
+// schedule micro-ops so that all three decoded each cycle are successfully
+// issued as the reservation station(s) simply don't stay occupied for long.
+// Therefore, IssueWidth is set to the narrower of the two at three, while still
+// modeling the machine as out-of-order.
+
+def IsCPSRDefinedPred : SchedPredicate<[{TII->isCPSRDefined(*MI)}]>;
+def IsCPSRDefinedAndPredicatedPred :
+ SchedPredicate<[{TII->isCPSRDefined(*MI) && TII->isPredicated(*MI)}]>;
+
+// Cortex A57 rev. r1p0 or later (false = r0px)
+def IsR1P0AndLaterPred : SchedPredicate<[{false}]>;
+
+// If Addrmode3 contains register offset (not immediate)
+def IsLdrAm3RegOffPred :
+ SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 1)}]>;
+// The same predicate with operand offset 2 and 3:
+def IsLdrAm3RegOffPredX2 :
+ SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 2)}]>;
+def IsLdrAm3RegOffPredX3 :
+ SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 3)}]>;
+
+// If Addrmode3 contains "minus register"
+def IsLdrAm3NegRegOffPred :
+ SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 1)}]>;
+// The same predicate with operand offset 2 and 3:
+def IsLdrAm3NegRegOffPredX2 :
+ SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 2)}]>;
+def IsLdrAm3NegRegOffPredX3 :
+ SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 3)}]>;
+
+// Load, scaled register offset, not plus LSL2
+def IsLdstsoScaledNotOptimalPredX0 :
+ SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 0)}]>;
+def IsLdstsoScaledNotOptimalPred :
+ SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 1)}]>;
+def IsLdstsoScaledNotOptimalPredX2 :
+ SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 2)}]>;
+
+// Load, scaled register offset
+def IsLdstsoScaledPred :
+ SchedPredicate<[{TII->isLdstScaledReg(*MI, 1)}]>;
+def IsLdstsoScaledPredX2 :
+ SchedPredicate<[{TII->isLdstScaledReg(*MI, 2)}]>;
+
+def IsLdstsoMinusRegPredX0 :
+ SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 0)}]>;
+def IsLdstsoMinusRegPred :
+ SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 1)}]>;
+def IsLdstsoMinusRegPredX2 :
+ SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 2)}]>;
+
+// Load, scaled register offset
+def IsLdrAm2ScaledPred :
+ SchedPredicate<[{TII->isAm2ScaledReg(*MI, 1)}]>;
+
+// LDM, base reg in list
+def IsLdmBaseRegInList :
+ SchedPredicate<[{TII->isLDMBaseRegInList(*MI)}]>;
+
+class A57WriteLMOpsListType<list<SchedWriteRes> writes> {
+ list <SchedWriteRes> Writes = writes;
+ SchedMachineModel SchedModel = ?;
+}
+
+// *** Common description and scheduling model parameters taken from AArch64 ***
+// (AArch64SchedA57.td)
+def CortexA57Model : SchedMachineModel {
+ let IssueWidth = 3; // 3-way decode and dispatch
+ let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
+ let LoadLatency = 4; // Optimistic load latency
+ let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch
+
+ // Enable partial & runtime unrolling.
+ let LoopMicroOpBufferSize = 16;
+ let CompleteModel = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Cortex-A57.
+// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where
+// micro-ops wait for their operands and then issue out-of-order.
+
+def A57UnitB : ProcResource<1>; // Type B micro-ops
+def A57UnitI : ProcResource<2>; // Type I micro-ops
+def A57UnitM : ProcResource<1>; // Type M micro-ops
+def A57UnitL : ProcResource<1>; // Type L micro-ops
+def A57UnitS : ProcResource<1>; // Type S micro-ops
+
+def A57UnitX : ProcResource<1>; // Type X micro-ops (F1)
+def A57UnitW : ProcResource<1>; // Type W micro-ops (F0)
+
+let SchedModel = CortexA57Model in {
+ def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops
+}
+
+let SchedModel = CortexA57Model in {
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Cortex-A57.
+
+include "ARMScheduleA57WriteRes.td"
+
+// To have "CompleteModel = 1", support of pseudos and special instructions
+def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$",
+ "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$",
+ "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$",
+ "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$",
+ "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE",
+ "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "UDF$", "t2DCPS", "t2SG",
+ "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier")>;
+
+def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>;
+
+// Specific memory instrs
+def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC",
+ "(t2)?STL", "(t2)?LDREX", "(t2)?STREX", "MEMCPY")>;
+
+// coprocessor moves
+def : InstRW<[WriteNoop, WriteNoop], (instregex
+ "(t2)?MCR(2|R|R2)?$", "(t2)?MRC(2)?$",
+ "(t2)?MRRC(2)?$", "(t2)?MRS(banked|sys|_AR|_M|sys_AR)?$",
+ "(t2)?MSR(banked|i|_AR|_M)?$")>;
+
+// Deprecated instructions
+def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>;
+
+// Pseudos
+def : InstRW<[WriteNoop], (instregex "(t2)?ABS$",
+ "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj",
+ "tLDRpci_pic", "t2SUBS_PC_LR",
+ "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp",
+ "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
+ "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
+ "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
+ "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
+ "WIN__CHKSTK", "WIN__DBZCHK")>;
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_1cyc_1I], (instrs COPY)>;
+
+// --- 3.2 Branch Instructions ---
+// B, BX, BL, BLX (imm, reg != LR, reg == LR), CBZ, CBNZ
+
+def : InstRW<[A57Write_1cyc_1B], (instregex "(t2|t)?B$", "t?BX", "(t2|t)?Bcc$",
+ "t?TAILJMP(d|r)", "TCRETURN(d|r)i", "tBfar", "tCBN?Z")>;
+def : InstRW<[A57Write_1cyc_1B_1I],
+ (instregex "t?BL$", "BL_pred$", "t?BLXi", "t?TPsoft")>;
+def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BLX", "tBLX(NS)?r")>;
+// Pseudos
+def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BCCi64", "BCCZi64")>;
+def : InstRW<[A57Write_3cyc_1B_1I], (instregex "BR_JTadd", "t?BR_JTr",
+ "t2BR_JT", "t2BXJ", "(t2)?TB(B|H)(_JT)?$", "tBRIND")>;
+def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>;
+
+// --- 3.3 Arithmetic and Logical Instructions ---
+// ADD{S}, ADC{S}, ADR, AND{S}, BIC{S}, CMN, CMP, EOR{S}, ORN{S}, ORR{S},
+// RSB{S}, RSC{S}, SUB{S}, SBC{S}, TEQ, TST
+
+def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>;
+
+// shift by register, conditional or unconditional
+// TODO: according to the doc, conditional uses I0/I1, unconditional uses M
+// Why more complex instruction uses more simple pipeline?
+// May be an error in doc.
+def A57WriteALUsi : SchedWriteVariant<[
+ // lsl #2, lsl #1, or lsr #1.
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1M]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+]>;
+def A57WriteALUsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+]>;
+def A57WriteALUSsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+]>;
+def A57ReadALUsr : SchedReadVariant<[
+ SchedVar<IsPredicatedPred, [ReadDefault]>,
+ SchedVar<NoSchedPred, [ReadDefault]>
+]>;
+def : SchedAlias<WriteALUsi, A57WriteALUsi>;
+def : SchedAlias<WriteALUsr, A57WriteALUsr>;
+def : SchedAlias<WriteALUSsr, A57WriteALUSsr>;
+def : SchedAlias<ReadALUsr, A57ReadALUsr>;
+
+def A57WriteCMPsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+]>;
+def : SchedAlias<WriteCMP, A57Write_1cyc_1I>;
+def : SchedAlias<WriteCMPsi, A57Write_2cyc_1M>;
+def : SchedAlias<WriteCMPsr, A57WriteCMPsr>;
+
+// --- 3.4 Move and Shift Instructions ---
+// Move, basic
+// MOV{S}, MOVW, MVN{S}
+def : InstRW<[A57Write_1cyc_1I], (instregex "MOV(r|i|i16|r_TC)",
+ "(t2)?MVN(CC)?(r|i)", "BMOVPCB_CALL", "BMOVPCRX_CALL",
+ "MOVCC(r|i|i16|i32imm)", "tMOV", "tMVN")>;
+
+// Move, shift by immed, setflags/no setflags
+// (ASR, LSL, LSR, ROR, RRX)=MOVsi, MVN
+// setflags = isCPSRDefined
+def A57WriteMOVsi : SchedWriteVariant<[
+ SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
+]>;
+def : InstRW<[A57WriteMOVsi], (instregex "MOV(CC)?si", "MVNsi",
+ "ASRi", "(t2|t)ASRri", "LSRi", "(t2|t)LSRri", "LSLi", "(t2|t)LSLri", "RORi",
+ "(t2|t)RORri", "(t2)?RRX", "t2MOV", "tROR")>;
+
+// shift by register, conditional or unconditional, setflags/no setflags
+def A57WriteMOVsr : SchedWriteVariant<[
+ SchedVar<IsCPSRDefinedAndPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>,
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
+]>;
+def : InstRW<[A57WriteMOVsr], (instregex "MOV(CC)?sr", "MVNsr", "t2MVNs",
+ "ASRr", "(t2|t)ASRrr", "LSRr", "(t2|t)LSRrr", "LSLr", "(t2|t)?LSLrr", "RORr",
+ "(t2|t)RORrr")>;
+
+// Move, top
+// MOVT - A57Write_2cyc_1M for r0px, A57Write_1cyc_1I for r1p0 and later
+def A57WriteMOVT : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_1cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+]>;
+def : InstRW<[A57WriteMOVT], (instregex "MOVTi16")>;
+
+def A57WriteI2pc :
+ WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_1cyc_1I]>;
+def A57WriteI2ld :
+ WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_4cyc_1L]>;
+def : InstRW< [A57WriteI2pc], (instregex "MOV_ga_pcrel")>;
+def : InstRW< [A57WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
+
+// +2cyc for branch forms
+def : InstRW<[A57Write_3cyc_1I], (instregex "MOVPC(LR|RX)")>;
+
+// --- 3.5 Divide and Multiply Instructions ---
+// Divide: SDIV, UDIV
+// latency from documentration: 4 ­‐ 20, maximum taken
+def : SchedAlias<WriteDIV, A57Write_20cyc_1M>;
+// Multiply: tMul not bound to common WriteRes types
+def : InstRW<[A57Write_3cyc_1M], (instregex "tMUL")>;
+def : SchedAlias<WriteMUL16, A57Write_3cyc_1M>;
+def : SchedAlias<WriteMUL32, A57Write_3cyc_1M>;
+def : ReadAdvance<ReadMUL, 0>;
+
+// Multiply accumulate: MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB,
+// SMLAWT, SMLAD{X}, SMLSD{X}, SMMLA{R}, SMMLS{R}
+// Multiply-accumulate pipelines support late-forwarding of accumulate operands
+// from similar μops, allowing a typical sequence of multiply-accumulate μops
+// to issue one every 1 cycle (sched advance = 2).
+def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
+def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; }
+def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>;
+
+def : SchedAlias<WriteMAC16, A57WriteMLA>;
+def : SchedAlias<WriteMAC32, A57WriteMLA>;
+def : SchedAlias<ReadMAC, A57ReadMLA>;
+
+def : SchedAlias<WriteMAC64Lo, A57WriteMLAL>;
+def : SchedAlias<WriteMAC64Hi, A57WriteMLAL>;
+
+// Multiply long: SMULL, UMULL
+def : SchedAlias<WriteMUL64Lo, A57Write_4cyc_1M>;
+def : SchedAlias<WriteMUL64Hi, A57Write_4cyc_1M>;
+
+// --- 3.6 Saturating and Parallel Arithmetic Instructions ---
+// Parallel arith
+// SADD16, SADD8, SSUB16, SSUB8, UADD16, UADD8, USUB16, USUB8
+// Conditional GE-setting instructions require three extra μops
+// and two additional cycles to conditionally update the GE field.
+def A57WriteParArith : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_4cyc_1I_1M]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1I_1M]>
+]>;
+def : InstRW< [A57WriteParArith], (instregex
+ "(t2)?SADD(16|8)", "(t2)?SSUB(16|8)",
+ "(t2)?UADD(16|8)", "(t2)?USUB(16|8)")>;
+
+// Parallel arith with exchange: SASX, SSAX, UASX, USAX
+def A57WriteParArithExch : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_5cyc_1I_1M]>,
+ SchedVar<NoSchedPred, [A57Write_3cyc_1I_1M]>
+]>;
+def : InstRW<[A57WriteParArithExch],
+ (instregex "(t2)?SASX", "(t2)?SSAX", "(t2)?UASX", "(t2)?USAX")>;
+
+// Parallel halving arith
+// SHADD16, SHADD8, SHSUB16, SHSUB8, UHADD16, UHADD8, UHSUB16, UHSUB8
+def : InstRW<[A57Write_2cyc_1M], (instregex
+ "(t2)?SHADD(16|8)", "(t2)?SHSUB(16|8)",
+ "(t2)?UHADD(16|8)", "(t2)?UHSUB(16|8)")>;
+
+// Parallel halving arith with exchange
+// SHASX, SHSAX, UHASX, UHSAX
+def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?SHASX", "(t2)?SHSAX",
+ "(t2)?UHASX", "(t2)?UHSAX")>;
+
+// Parallel saturating arith
+// QADD16, QADD8, QSUB16, QSUB8, UQADD16, UQADD8, UQSUB16, UQSUB8
+def : InstRW<[A57Write_2cyc_1M], (instregex "QADD(16|8)", "QSUB(16|8)",
+ "UQADD(16|8)", "UQSUB(16|8)", "t2(U?)QADD", "t2(U?)QSUB")>;
+
+// Parallel saturating arith with exchange
+// QASX, QSAX, UQASX, UQSAX
+def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QASX", "(t2)?QSAX",
+ "(t2)?UQASX", "(t2)?UQSAX")>;
+
+// Saturate: SSAT, SSAT16, USAT, USAT16
+def : InstRW<[A57Write_2cyc_1M],
+ (instregex "(t2)?SSAT(16)?", "(t2)?USAT(16)?")>;
+
+// Saturating arith: QADD, QSUB
+def : InstRW<[A57Write_2cyc_1M], (instregex "QADD$", "QSUB$")>;
+
+// Saturating doubling arith: QDADD, QDSUB
+def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QDADD", "(t2)?QDSUB")>;
+
+// --- 3.7 Miscellaneous Data-Processing Instructions ---
+// Bit field extract: SBFX, UBFX
+def : InstRW<[A57Write_1cyc_1I], (instregex "(t2)?SBFX", "(t2)?UBFX")>;
+
+// Bit field insert/clear: BFI, BFC
+def : InstRW<[A57Write_2cyc_1M], (instregex "(t2)?BFI", "(t2)?BFC")>;
+
+// Select bytes, conditional/unconditional
+def A57WriteSEL : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
+]>;
+def : InstRW<[A57WriteSEL], (instregex "(t2)?SEL")>;
+
+// Sign/zero extend, normal: SXTB, SXTH, UXTB, UXTH
+def : InstRW<[A57Write_1cyc_1I],
+ (instregex "(t2|t)?SXT(B|H)$", "(t2|t)?UXT(B|H)$")>;
+
+// Sign/zero extend and add, normal: SXTAB, SXTAH, UXTAB, UXTAH
+def : InstRW<[A57Write_2cyc_1M],
+ (instregex "(t2)?SXTA(B|H)$", "(t2)?UXTA(B|H)$")>;
+
+// Sign/zero extend and add, parallel: SXTAB16, UXTAB16
+def : InstRW<[A57Write_4cyc_1M], (instregex "(t2)?SXTAB16", "(t2)?UXTAB16")>;
+
+// Sum of absolute differences: USAD8, USADA8
+def : InstRW<[A57Write_3cyc_1M], (instregex "(t2)?USAD8", "(t2)?USADA8")>;
+
+// --- 3.8 Load Instructions ---
+
+// Load, immed offset
+// LDR and LDRB have LDRi12 and LDRBi12 forms for immediate
+def : InstRW<[A57Write_4cyc_1L], (instregex "LDRi12", "LDRBi12",
+ "LDRcp", "(t2|t)?LDRConstPool", "LDRLIT_ga_(pcrel|abs)",
+ "PICLDR", "tLDR")>;
+
+def : InstRW<[A57Write_4cyc_1L],
+ (instregex "t2LDRS?(B|H)?(pcrel|T|i8|i12|pci|pci_pic|s)?$")>;
+
+// For "Load, register offset, minus" we need +1cyc, +1I
+def A57WriteLdrAm3 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPred, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
+]>;
+def : InstRW<[A57WriteLdrAm3], (instregex "LDR(H|SH|SB)$")>;
+def A57WriteLdrAm3X2 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
+]>;
+def : InstRW<[A57WriteLdrAm3X2, A57WriteLdrAm3X2], (instregex "LDRD$")>;
+def : InstRW<[A57Write_4cyc_1L, A57Write_4cyc_1L], (instregex "t2LDRDi8")>;
+
+def A57WriteLdrAmLDSTSO : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_5cyc_1I_1L]>,
+ SchedVar<IsLdstsoMinusRegPred, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
+]>;
+def : InstRW<[A57WriteLdrAmLDSTSO], (instregex "LDRrs", "LDRBrs")>;
+
+def A57WrBackOne : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+}
+def A57WrBackTwo : SchedWriteRes<[]> {
+ let Latency = 2;
+ let NumMicroOps = 0;
+}
+def A57WrBackThree : SchedWriteRes<[]> {
+ let Latency = 3;
+ let NumMicroOps = 0;
+}
+
+// --- LDR pre-indexed ---
+// Load, immed pre-indexed (4 cyc for load result, 1 cyc for Base update)
+def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR_PRE_IMM",
+ "LDRB_PRE_IMM", "t2LDRB_PRE")>;
+
+// Load, register pre-indexed (4 cyc for load result, 2 cyc for Base update)
+// (5 cyc load result for not-lsl2 scaled)
+def A57WriteLdrAmLDSTSOPre : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
+]>;
+def : InstRW<[A57WriteLdrAmLDSTSOPre, A57WrBackTwo],
+ (instregex "LDR_PRE_REG", "LDRB_PRE_REG")>;
+
+def A57WriteLdrAm3PreWrBack : SchedWriteVariant<[
+ SchedVar<IsLdrAm3RegOffPredX2, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57Write_4cyc_1L, A57WriteLdrAm3PreWrBack],
+ (instregex "LDR(H|SH|SB)_PRE")>;
+def : InstRW<[A57Write_4cyc_1L, A57WrBackOne],
+ (instregex "t2LDR(H|SH|SB)?_PRE")>;
+
+// LDRD pre-indexed: 5(2) cyc for reg, 4(1) cyc for imm.
+def A57WriteLdrDAm3Pre : SchedWriteVariant<[
+ SchedVar<IsLdrAm3RegOffPredX3, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
+]>;
+def A57WriteLdrDAm3PreWrBack : SchedWriteVariant<[
+ SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57WriteLdrDAm3Pre, A57WriteLdrDAm3Pre, A57WriteLdrDAm3PreWrBack],
+ (instregex "LDRD_PRE")>;
+def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne],
+ (instregex "t2LDRD_PRE")>;
+
+// --- LDR post-indexed ---
+def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR(T?)_POST_IMM",
+ "LDRB(T?)_POST_IMM", "LDR(SB|H|SH)Ti", "t2LDRB_POST")>;
+
+def A57WriteLdrAm3PostWrBack : SchedWriteVariant<[
+ SchedVar<IsLdrAm3RegOffPred, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57Write_4cyc_1L_1I, A57WriteLdrAm3PostWrBack],
+ (instregex "LDR(H|SH|SB)_POST")>;
+def : InstRW<[A57Write_4cyc_1L, A57WrBackOne],
+ (instregex "t2LDR(H|SH|SB)?_POST")>;
+
+def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG",
+ "LDRB_POST_REG", "LDR(B?)T_POST$")>;
+
+def A57WriteLdrTRegPost : SchedWriteVariant<[
+ SchedVar<IsLdrAm2ScaledPred, [A57Write_4cyc_1I_1L_1M]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
+]>;
+def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[
+ SchedVar<IsLdrAm2ScaledPred, [A57WrBackThree]>,
+ SchedVar<NoSchedPred, [A57WrBackTwo]>
+]>;
+// 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L"
+def : InstRW<[A57WriteLdrTRegPost, A57WriteLdrTRegPostWrBack],
+ (instregex "LDRT_POST_REG", "LDRBT_POST_REG")>;
+
+def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR(SB|H|SH)Tr")>;
+
+def A57WriteLdrAm3PostWrBackX3 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+// LDRD post-indexed: 4(2) cyc for reg, 4(1) cyc for imm.
+def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
+ A57WriteLdrAm3PostWrBackX3], (instregex "LDRD_POST")>;
+def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne],
+ (instregex "t2LDRD_POST")>;
+
+// --- Preload instructions ---
+// Preload, immed offset
+def : InstRW<[A57Write_4cyc_1L], (instregex "(t2)?PLDi12", "(t2)?PLDWi12",
+ "t2PLDW?(i8|pci|s)", "(t2)?PLI")>;
+
+// Preload, register offset,
+// 5cyc "I0/I1,L" for minus reg or scaled not plus lsl2
+// otherwise 4cyc "L"
+def A57WritePLD : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledNotOptimalPredX0, [A57Write_5cyc_1I_1L]>,
+ SchedVar<IsLdstsoMinusRegPredX0, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
+]>;
+def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>;
+
+// --- Load multiple instructions ---
+foreach NumAddr = 1-8 in {
+ def A57LMAddrPred#NumAddr :
+ SchedPredicate<"(TII->getLDMVariableDefsSize(*MI)+1)/2 == "#NumAddr>;
+}
+
+def A57LDMOpsListNoregin : A57WriteLMOpsListType<
+ [A57Write_3cyc_1L, A57Write_3cyc_1L,
+ A57Write_4cyc_1L, A57Write_4cyc_1L,
+ A57Write_5cyc_1L, A57Write_5cyc_1L,
+ A57Write_6cyc_1L, A57Write_6cyc_1L,
+ A57Write_7cyc_1L, A57Write_7cyc_1L,
+ A57Write_8cyc_1L, A57Write_8cyc_1L,
+ A57Write_9cyc_1L, A57Write_9cyc_1L,
+ A57Write_10cyc_1L, A57Write_10cyc_1L]>;
+def A57WriteLDMnoreginlist : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57LDMOpsListNoregin.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57LDMOpsListNoregin.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57LDMOpsListNoregin.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57LDMOpsListNoregin.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57LDMOpsListNoregin.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57LDMOpsListNoregin.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57LDMOpsListNoregin.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57LDMOpsListNoregin.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57LDMOpsListNoregin.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57LDMOpsListRegin : A57WriteLMOpsListType<
+ [A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
+ A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
+ A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
+ A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
+ A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
+ A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
+ A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I,
+ A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I]>;
+def A57WriteLDMreginlist : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57LDMOpsListRegin.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57LDMOpsListRegin.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57LDMOpsListRegin.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57LDMOpsListRegin.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57LDMOpsListRegin.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57LDMOpsListRegin.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57LDMOpsListRegin.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57LDMOpsListRegin.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57LDMOpsListRegin.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57LDMOpsList_Upd : A57WriteLMOpsListType<
+ [A57WrBackOne,
+ A57Write_3cyc_1L_1I, A57Write_3cyc_1L_1I,
+ A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
+ A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
+ A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
+ A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
+ A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
+ A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
+ A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>;
+def A57WriteLDM_Upd : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57LDMOpsList_Upd.Writes[0-2]>,
+ SchedVar<A57LMAddrPred2, A57LDMOpsList_Upd.Writes[0-4]>,
+ SchedVar<A57LMAddrPred3, A57LDMOpsList_Upd.Writes[0-6]>,
+ SchedVar<A57LMAddrPred4, A57LDMOpsList_Upd.Writes[0-8]>,
+ SchedVar<A57LMAddrPred5, A57LDMOpsList_Upd.Writes[0-10]>,
+ SchedVar<A57LMAddrPred6, A57LDMOpsList_Upd.Writes[0-12]>,
+ SchedVar<A57LMAddrPred7, A57LDMOpsList_Upd.Writes[0-14]>,
+ SchedVar<A57LMAddrPred8, A57LDMOpsList_Upd.Writes[0-16]>,
+ SchedVar<NoSchedPred, A57LDMOpsList_Upd.Writes[0-16]>
+]> { let Variadic=1; }
+
+def A57WriteLDM : SchedWriteVariant<[
+ SchedVar<IsLdmBaseRegInList, [A57WriteLDMreginlist]>,
+ SchedVar<NoSchedPred, [A57WriteLDMnoreginlist]>
+]> { let Variadic=1; }
+
+def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>;
+
+// TODO: no writeback latency defined in documentation (implemented as 1 cyc)
+def : InstRW<[A57WriteLDM_Upd],
+ (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>;
+
+// --- 3.9 Store Instructions ---
+
+// Store, immed offset
+def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR",
+ "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>;
+
+// Store, register offset
+// For minus or for not plus lsl2 scaled we need 3cyc "I0/I1, S",
+// otherwise 1cyc S.
+def A57WriteStrAmLDSTSO : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_3cyc_1I_1S]>,
+ SchedVar<IsLdstsoMinusRegPred, [A57Write_3cyc_1I_1S]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
+]>;
+def : InstRW<[A57WriteStrAmLDSTSO], (instregex "STRrs", "STRBrs")>;
+
+// STRH,STRD: 3cyc "I0/I1, S" for minus reg, 1cyc S for imm or for plus reg.
+def A57WriteStrAm3 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPred, [A57Write_3cyc_1I_1S]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
+]>;
+def : InstRW<[A57WriteStrAm3], (instregex "STRH$")>;
+def A57WriteStrAm3X2 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
+]>;
+def : InstRW<[A57WriteStrAm3X2], (instregex "STRD$")>;
+
+// Store, immed pre-indexed (1cyc "S, I0/I1", 1cyc writeback)
+def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR_PRE_IMM",
+ "STRB_PRE_IMM", "STR(B)?(r|i)_preidx", "(t2)?STRH_(preidx|PRE)",
+ "t2STR(B?)_(PRE|preidx)", "t2STRD_PRE")>;
+
+// Store, register pre-indexed:
+// 1(1) "S, I0/I1" for plus reg
+// 3(2) "I0/I1, S" for minus reg
+// 1(2) "S, M" for scaled plus lsl2
+// 3(2) "I0/I1, S" for other scaled
+def A57WriteStrAmLDSTSOPre : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_3cyc_1I_1S]>,
+ SchedVar<IsLdstsoMinusRegPredX2, [A57Write_3cyc_1I_1S]>,
+ SchedVar<IsLdstsoScaledPredX2, [A57Write_1cyc_1S_1M]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
+]>;
+def A57WriteStrAmLDSTSOPreWrBack : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledPredX2, [A57WrBackTwo]>,
+ SchedVar<IsLdstsoMinusRegPredX2, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57WriteStrAmLDSTSOPreWrBack, A57WriteStrAmLDSTSOPre],
+ (instregex "STR_PRE_REG", "STRB_PRE_REG")>;
+
+// pre-indexed STRH/STRD (STRH_PRE, STRD_PRE)
+// 1(1) "S, I0/I1" for imm or reg plus
+// 3(2) "I0/I1, S" for reg minus
+def A57WriteStrAm3PreX2 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
+]>;
+def A57WriteStrAm3PreWrBackX2 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX2, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57WriteStrAm3PreWrBackX2, A57WriteStrAm3PreX2],
+ (instregex "STRH_PRE")>;
+
+def A57WriteStrAm3PreX3 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX3, [A57Write_3cyc_1I_1S]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
+]>;
+def A57WriteStrAm3PreWrBackX3 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX3, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57WriteStrAm3PreWrBackX3, A57WriteStrAm3PreX3],
+ (instregex "STRD_PRE")>;
+
+def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR(T?)_POST_IMM",
+ "STRB(T?)_POST_IMM", "t2STR(B?)_POST")>;
+
+// 1(2) "S, M" for STR/STRB register post-indexed (both scaled or not)
+def : InstRW<[A57WrBackTwo, A57Write_1cyc_1S_1M], (instregex "STR(T?)_POST_REG",
+ "STRB(T?)_POST_REG", "STR(B?)T_POST$")>;
+
+// post-indexed STRH/STRD(STRH_POST, STRD_POST), STRHTi, STRHTr
+// 1(1) "S, I0/I1" both for reg or imm
+def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I],
+ (instregex "(t2)?STR(H|D)_POST", "STRHT(i|r)", "t2STRHT")>;
+
+// --- Store multiple instructions ---
+// TODO: no writeback latency defined in documentation
+def A57WriteSTM : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>,
+ SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>,
+ SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>,
+ SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>,
+ SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>,
+ SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>,
+ SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>,
+ SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1S]>
+]>;
+def A57WriteSTM_Upd : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
+]>;
+
+def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>;
+def : InstRW<[A57WrBackOne, A57WriteSTM_Upd],
+ (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>;
+
+// --- 3.10 FP Data Processing Instructions ---
+def : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>;
+def : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>;
+
+def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>;
+
+// fp compare - 3cyc F1 for unconditional, 6cyc "F0/F1, F1" for conditional
+def A57WriteVcmp : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_6cyc_1V_1X]>,
+ SchedVar<NoSchedPred, [A57Write_3cyc_1X]>
+]>;
+def : InstRW<[A57WriteVcmp],
+ (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>;
+
+// fp convert
+def : InstRW<[A57Write_5cyc_1V], (instregex
+ "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>;
+
+def : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>;
+
+// FP round to integral
+def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>;
+
+// FP divide, FP square root
+def : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>;
+def : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>;
+def : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>;
+def : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>;
+
+// FP max/min
+def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>;
+
+// FP multiply-accumulate pipelines support late forwarding of the result
+// from FP multiply μops to the accumulate operands of an
+// FP multiply-accumulate μop. The latter can potentially be issued 1 cycle
+// after the FP multiply μop has been issued
+// FP multiply, FZ
+def A57WriteVMUL : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+
+def : SchedAlias<WriteFPMUL32, A57WriteVMUL>;
+def : SchedAlias<WriteFPMUL64, A57WriteVMUL>;
+def : ReadAdvance<ReadFPMUL, 0>;
+
+// FP multiply accumulate, FZ: 9cyc "F0/F1" or 4 cyc for sequenced accumulate
+// VFMA, VFMS, VFNMA, VFNMS, VMLA, VMLS, VNMLA, VNMLS
+def A57WriteVFMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+
+// VFMA takes 9 cyc for common case and 4 cyc for VFMA->VFMA chain (5 read adv.)
+// VMUL takes 5 cyc for common case and 1 cyc for VMUL->VFMA chain (4 read adv.)
+// Currently, there is no way to define different read advances for VFMA operand
+// from VFMA or from VMUL, so there will be 5 read advance.
+// Zero latency (instead of one) for VMUL->VFMA shouldn't break something.
+// The same situation with ASIMD VMUL/VFMA instructions
+// def A57ReadVFMA : SchedRead;
+// def : ReadAdvance<A57ReadVFMA, 5, [A57WriteVFMA]>;
+// def : ReadAdvance<A57ReadVFMA, 4, [A57WriteVMUL]>;
+def A57ReadVFMA5 : SchedReadAdvance<5, [A57WriteVFMA, A57WriteVMUL]>;
+
+def : SchedAlias<WriteFPMAC32, A57WriteVFMA>;
+def : SchedAlias<WriteFPMAC64, A57WriteVFMA>;
+def : SchedAlias<ReadFPMAC, A57ReadVFMA5>;
+
+def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>;
+def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>;
+
+// --- 3.11 FP Miscellaneous Instructions ---
+// VMOV: 3cyc "F0/F1" for imm/reg
+def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>;
+def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>;
+
+// 5cyc L for FP transfer, vfp to core reg,
+// 5cyc L for FP transfer, core reg to vfp
+def : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>;
+// VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2).
+def : InstRW<[A57Write_5cyc_1L, A57Write_5cyc_1L], (instregex "VMOV(RRS|RRD)")>;
+
+// 8cyc "L,F0/F1" for FP transfer, core reg to upper or lower half of vfp D-reg
+def : InstRW<[A57Write_8cyc_1L_1I], (instregex "VMOVDRR")>;
+
+// --- 3.12 FP Load Instructions ---
+def : InstRW<[A57Write_5cyc_1L], (instregex "VLDR(D|S|H)")>;
+
+def : InstRW<[A57Write_5cyc_1L], (instregex "VLDMQIA$")>;
+
+// FP load multiple (VLDM)
+
+def A57VLDMOpsListUncond : A57WriteLMOpsListType<
+ [A57Write_5cyc_1L, A57Write_5cyc_1L,
+ A57Write_6cyc_1L, A57Write_6cyc_1L,
+ A57Write_7cyc_1L, A57Write_7cyc_1L,
+ A57Write_8cyc_1L, A57Write_8cyc_1L,
+ A57Write_9cyc_1L, A57Write_9cyc_1L,
+ A57Write_10cyc_1L, A57Write_10cyc_1L,
+ A57Write_11cyc_1L, A57Write_11cyc_1L,
+ A57Write_12cyc_1L, A57Write_12cyc_1L]>;
+def A57WriteVLDMuncond : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57VLDMOpsListUncond.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57VLDMOpsListCond : A57WriteLMOpsListType<
+ [A57Write_5cyc_1L, A57Write_6cyc_1L,
+ A57Write_7cyc_1L, A57Write_8cyc_1L,
+ A57Write_9cyc_1L, A57Write_10cyc_1L,
+ A57Write_11cyc_1L, A57Write_12cyc_1L,
+ A57Write_13cyc_1L, A57Write_14cyc_1L,
+ A57Write_15cyc_1L, A57Write_16cyc_1L,
+ A57Write_17cyc_1L, A57Write_18cyc_1L,
+ A57Write_19cyc_1L, A57Write_20cyc_1L]>;
+def A57WriteVLDMcond : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57VLDMOpsListCond.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57VLDMOpsListCond.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57VLDMOpsListCond.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57VLDMOpsListCond.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57VLDMOpsListCond.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57VLDMOpsListCond.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57VLDMOpsListCond.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57VLDMOpsListCond.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57VLDMOpsListCond.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57WriteVLDM : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57WriteVLDMcond]>,
+ SchedVar<NoSchedPred, [A57WriteVLDMuncond]>
+]> { let Variadic=1; }
+
+def : InstRW<[A57WriteVLDM], (instregex "VLDM(DIA|SIA)$")>;
+
+def A57VLDMOpsListUncond_Upd : A57WriteLMOpsListType<
+ [A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
+ A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
+ A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
+ A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
+ A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
+ A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I,
+ A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I,
+ A57Write_12cyc_1L_1I, A57Write_12cyc_1L_1I]>;
+def A57WriteVLDMuncond_UPD : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond_Upd.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond_Upd.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond_Upd.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond_Upd.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond_Upd.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond_Upd.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond_Upd.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond_Upd.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57VLDMOpsListUncond_Upd.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57VLDMOpsListCond_Upd : A57WriteLMOpsListType<
+ [A57Write_5cyc_1L_1I, A57Write_6cyc_1L_1I,
+ A57Write_7cyc_1L_1I, A57Write_8cyc_1L_1I,
+ A57Write_9cyc_1L_1I, A57Write_10cyc_1L_1I,
+ A57Write_11cyc_1L_1I, A57Write_12cyc_1L_1I,
+ A57Write_13cyc_1L_1I, A57Write_14cyc_1L_1I,
+ A57Write_15cyc_1L_1I, A57Write_16cyc_1L_1I,
+ A57Write_17cyc_1L_1I, A57Write_18cyc_1L_1I,
+ A57Write_19cyc_1L_1I, A57Write_20cyc_1L_1I]>;
+def A57WriteVLDMcond_UPD : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57VLDMOpsListCond_Upd.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57VLDMOpsListCond_Upd.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57VLDMOpsListCond_Upd.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57VLDMOpsListCond_Upd.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57VLDMOpsListCond_Upd.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57VLDMOpsListCond_Upd.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57VLDMOpsListCond_Upd.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57VLDMOpsListCond_Upd.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57VLDMOpsListCond_Upd.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57WriteVLDM_UPD : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57WriteVLDMcond_UPD]>,
+ SchedVar<NoSchedPred, [A57WriteVLDMuncond_UPD]>
+]> { let Variadic=1; }
+
+def : InstRW<[A57WrBackOne, A57WriteVLDM_UPD],
+ (instregex "VLDM(DIA_UPD|DDB_UPD|SIA_UPD|SDB_UPD)")>;
+
+// --- 3.13 FP Store Instructions ---
+def : InstRW<[A57Write_1cyc_1S], (instregex "VSTR(D|S|H)")>;
+
+def : InstRW<[A57Write_2cyc_1S], (instregex "VSTMQIA$")>;
+
+def A57WriteVSTMs : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>,
+ SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>,
+ SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>,
+ SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>,
+ SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>,
+ SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>,
+ SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>,
+ SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1S]>
+]>;
+def A57WriteVSTMd : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S]>,
+ SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S]>,
+ SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S]>,
+ SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S]>,
+ SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S]>,
+ SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S]>,
+ SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S]>,
+ SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1S]>
+]>;
+def A57WriteVSTMs_Upd : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
+]>;
+def A57WriteVSTMd_Upd : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
+]>;
+
+def : InstRW<[A57WriteVSTMs], (instregex "VSTMSIA$")>;
+def : InstRW<[A57WriteVSTMd], (instregex "VSTMDIA$")>;
+def : InstRW<[A57WrBackOne, A57WriteVSTMs_Upd],
+ (instregex "VSTM(SIA_UPD|SDB_UPD)")>;
+def : InstRW<[A57WrBackOne, A57WriteVSTMd_Upd],
+ (instregex "VSTM(DIA_UPD|DDB_UPD)")>;
+
+// --- 3.14 ASIMD Integer Instructions ---
+
+// ASIMD absolute diff, 3cyc F0/F1 for integer VABD
+def : InstRW<[A57Write_3cyc_1V], (instregex "VABD(s|u)")>;
+
+// ASIMD absolute diff accum: 4(1) F1 for D-form, 5(2) F1 for Q-form
+def A57WriteVABAD : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57ReadVABAD : SchedReadAdvance<3, [A57WriteVABAD]>;
+def : InstRW<[A57WriteVABAD, A57ReadVABAD],
+ (instregex "VABA(s|u)(v8i8|v4i16|v2i32)")>;
+def A57WriteVABAQ : SchedWriteRes<[A57UnitX]> { let Latency = 5; }
+def A57ReadVABAQ : SchedReadAdvance<3, [A57WriteVABAQ]>;
+def : InstRW<[A57WriteVABAQ, A57ReadVABAQ],
+ (instregex "VABA(s|u)(v16i8|v8i16|v4i32)")>;
+
+// ASIMD absolute diff accum long: 4(1) F1 for VABAL
+def A57WriteVABAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57ReadVABAL : SchedReadAdvance<3, [A57WriteVABAL]>;
+def : InstRW<[A57WriteVABAL, A57ReadVABAL], (instregex "VABAL(s|u)")>;
+
+// ASIMD absolute diff long: 3cyc F0/F1 for VABDL
+def : InstRW<[A57Write_3cyc_1V], (instregex "VABDL(s|u)")>;
+
+// ASIMD arith, basic
+def : InstRW<[A57Write_3cyc_1V], (instregex "VADD", "VADDL", "VADDW",
+ "VNEG(s8d|s16d|s32d|s8q|s16q|s32q|d|q)",
+ "VPADDi", "VPADDL", "VSUB", "VSUBL", "VSUBW")>;
+
+// ASIMD arith, complex
+def : InstRW<[A57Write_3cyc_1V], (instregex "VABS", "VADDHN", "VHADD", "VHSUB",
+ "VQABS", "VQADD", "VQNEG", "VQSUB",
+ "VRADDHN", "VRHADD", "VRSUBHN", "VSUBHN")>;
+
+// ASIMD compare
+def : InstRW<[A57Write_3cyc_1V],
+ (instregex "VCEQ", "VCGE", "VCGT", "VCLE", "VTST", "VCLT")>;
+
+// ASIMD logical
+def : InstRW<[A57Write_3cyc_1V],
+ (instregex "VAND", "VBIC", "VMVN", "VORR", "VORN", "VEOR")>;
+
+// ASIMD max/min
+def : InstRW<[A57Write_3cyc_1V],
+ (instregex "(VMAX|VMIN)(s|u)", "(VPMAX|VPMIN)(s8|s16|s32|u8|u16|u32)")>;
+
+// ASIMD multiply, D-form: 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
+// Cortex-A57 r1p0 and later reduce the latency of ASIMD multiply
+// and multiply-with-accumulate instructions relative to r0pX.
+def A57WriteVMULD_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
+def : InstRW<[A57WriteVMULD_VecInt], (instregex
+ "VMUL(v8i8|v4i16|v2i32|pd)", "VMULsl(v4i16|v2i32)",
+ "VQDMULH(sl)?(v4i16|v2i32)", "VQRDMULH(sl)?(v4i16|v2i32)")>;
+
+// ASIMD multiply, Q-form: 6cyc F0 for r0px, 5cyc F0 for r1p0 and later
+def A57WriteVMULQ_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>;
+def : InstRW<[A57WriteVMULQ_VecInt], (instregex
+ "VMUL(v16i8|v8i16|v4i32|pq)", "VMULsl(v8i16|v4i32)",
+ "VQDMULH(sl)?(v8i16|v4i32)", "VQRDMULH(sl)?(v8i16|v4i32)")>;
+
+// ASIMD multiply accumulate, D-form
+// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence
+// (4 or 3 ReadAdvance)
+def A57WriteVMLAD_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
+def A57ReadVMLAD_VecInt : SchedReadVariant<[
+ SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAD_VecInt]>]>,
+ SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAD_VecInt]>]>
+]>;
+def : InstRW<[A57WriteVMLAD_VecInt, A57ReadVMLAD_VecInt],
+ (instregex "VMLA(sl)?(v8i8|v4i16|v2i32)", "VMLS(sl)?(v8i8|v4i16|v2i32)")>;
+
+// ASIMD multiply accumulate, Q-form
+// 6cyc F0 for r0px, 5cyc F0 for r1p0 and later, 2cyc for accumulate sequence
+// (4 or 3 ReadAdvance)
+def A57WriteVMLAQ_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>;
+def A57ReadVMLAQ_VecInt : SchedReadVariant<[
+ SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAQ_VecInt]>]>,
+ SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAQ_VecInt]>]>
+]>;
+def : InstRW<[A57WriteVMLAQ_VecInt, A57ReadVMLAQ_VecInt],
+ (instregex "VMLA(sl)?(v16i8|v8i16|v4i32)", "VMLS(sl)?(v16i8|v8i16|v4i32)")>;
+
+// ASIMD multiply accumulate long
+// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence
+// (4 or 3 ReadAdvance)
+def A57WriteVMLAL_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
+def A57ReadVMLAL_VecInt : SchedReadVariant<[
+ SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAL_VecInt]>]>,
+ SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAL_VecInt]>]>
+]>;
+def : InstRW<[A57WriteVMLAL_VecInt, A57ReadVMLAL_VecInt],
+ (instregex "VMLAL(s|u)", "VMLSL(s|u)")>;
+
+// ASIMD multiply accumulate saturating long
+// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 2cyc for accumulate sequence
+// (3 or 2 ReadAdvance)
+def A57WriteVQDMLAL_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
+def A57ReadVQDMLAL_VecInt : SchedReadVariant<[
+ SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<2, [A57WriteVQDMLAL_VecInt]>]>,
+ SchedVar<NoSchedPred, [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]>
+]>;
+def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt],
+ (instregex "VQDMLAL", "VQDMLSL")>;
+
+// ASIMD multiply long
+// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
+def A57WriteVMULL_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
+def : InstRW<[A57WriteVMULL_VecInt],
+ (instregex "VMULL(s|u|p8|sls|slu)", "VQDMULL")>;
+
+// ASIMD pairwise add and accumulate
+// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance)
+def A57WriteVPADAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57ReadVPADAL : SchedReadAdvance<3, [A57WriteVPADAL]>;
+def : InstRW<[A57WriteVPADAL, A57ReadVPADAL], (instregex "VPADAL(s|u)")>;
+
+// ASIMD shift accumulate
+// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance)
+def A57WriteVSRA : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57ReadVSRA : SchedReadAdvance<3, [A57WriteVSRA]>;
+def : InstRW<[A57WriteVSRA, A57ReadVSRA], (instregex "VSRA", "VRSRA")>;
+
+// ASIMD shift by immed, basic
+def : InstRW<[A57Write_3cyc_1X],
+ (instregex "VMOVL", "VSHLi", "VSHLL", "VSHR(s|u)", "VSHRN")>;
+
+// ASIMD shift by immed, complex
+def : InstRW<[A57Write_4cyc_1X], (instregex
+ "VQRSHRN", "VQRSHRUN", "VQSHL(si|ui|su)", "VQSHRN", "VQSHRUN", "VRSHR(s|u)",
+ "VRSHRN")>;
+
+// ASIMD shift by immed and insert, basic, D-form
+def : InstRW<[A57Write_4cyc_1X], (instregex
+ "VSLI(v8i8|v4i16|v2i32|v1i64)", "VSRI(v8i8|v4i16|v2i32|v1i64)")>;
+
+// ASIMD shift by immed and insert, basic, Q-form
+def : InstRW<[A57Write_5cyc_1X], (instregex
+ "VSLI(v16i8|v8i16|v4i32|v2i64)", "VSRI(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD shift by register, basic, D-form
+def : InstRW<[A57Write_3cyc_1X], (instregex
+ "VSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>;
+
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[A57Write_4cyc_1X], (instregex
+ "VSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD shift by register, complex, D-form
+// VQRSHL, VQSHL, VRSHL
+def : InstRW<[A57Write_4cyc_1X], (instregex
+ "VQRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", "VQSHL(s|u)(v8i8|v4i16|v2i32|v1i64)",
+ "VRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>;
+
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[A57Write_5cyc_1X], (instregex
+ "VQRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", "VQSHL(s|u)(v16i8|v8i16|v4i32|v2i64)",
+ "VRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>;
+
+// --- 3.15 ASIMD Floating-Point Instructions ---
+// ASIMD FP absolute value
+def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(fd|fq|hd|hq)")>;
+
+// ASIMD FP arith
+def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)",
+ "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>;
+
+// ASIMD FP compare
+def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)",
+ "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>;
+
+// ASIMD FP convert, integer
+def : InstRW<[A57Write_5cyc_1V], (instregex
+ "VCVT(f2sd|f2ud|s2fd|u2fd|f2sq|f2uq|s2fq|u2fq|f2xsd|f2xud|xs2fd|xu2fd)",
+ "VCVT(f2xsq|f2xuq|xs2fq|xu2fq)",
+ "VCVT(AN|MN|NN|PN)(SDf|SQf|UDf|UQf|SDh|SQh|UDh|UQh)")>;
+
+// ASIMD FP convert, half-precision: 8cyc F0/F1
+def : InstRW<[A57Write_8cyc_1V], (instregex
+ "VCVT(h2sd|h2ud|s2hd|u2hd|h2sq|h2uq|s2hq|u2hq|h2xsd|h2xud|xs2hd|xu2hd)",
+ "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)",
+ "VCVT(f2h|h2f)")>;
+
+// ASIMD FP max/min
+def : InstRW<[A57Write_5cyc_1V], (instregex
+ "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "VMAXNM", "VMINNM")>;
+
+// ASIMD FP multiply
+def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>;
+
+// ASIMD FP multiply accumulate: 9cyc F0/F1, 4cyc for accumulate sequence
+def A57WriteVMLA_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57ReadVMLA_VecFP :
+ SchedReadAdvance<5, [A57WriteVMLA_VecFP, A57WriteVMUL_VecFP]>;
+def : InstRW<[A57WriteVMLA_VecFP, A57ReadVMLA_VecFP],
+ (instregex "(VMLA|VMLS)(sl)?(fd|fq|hd|hq)", "(VFMA|VFMS)(fd|fq|hd|hq)")>;
+
+// ASIMD FP negate
+def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG(fd|f32q|hd|hq)")>;
+
+// ASIMD FP round to integral
+def : InstRW<[A57Write_5cyc_1V], (instregex
+ "VRINT(AN|MN|NN|PN|XN|ZN)(Df|Qf|Dh|Qh)")>;
+
+// --- 3.16 ASIMD Miscellaneous Instructions ---
+
+// ASIMD bitwise insert
+def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL")>;
+
+// ASIMD count
+def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>;
+
+// ASIMD duplicate, core reg: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VDUP(8|16|32)(d|q)")>;
+
+// ASIMD duplicate, scalar: 3cyc "F0/F1"
+def : InstRW<[A57Write_3cyc_1V], (instregex "VDUPLN(8|16|32)(d|q)")>;
+
+// ASIMD extract
+def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>;
+
+// ASIMD move, immed
+def : InstRW<[A57Write_3cyc_1V], (instregex
+ "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)",
+ "VMOVQ0")>;
+
+// ASIMD move, narrowing
+def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>;
+
+// ASIMD move, saturating
+def : InstRW<[A57Write_4cyc_1X], (instregex "VQMOVN")>;
+
+// ASIMD reciprocal estimate
+def : InstRW<[A57Write_5cyc_1V], (instregex "VRECPE", "VRSQRTE")>;
+
+// ASIMD reciprocal step, FZ
+def : InstRW<[A57Write_9cyc_1V], (instregex "VRECPS", "VRSQRTS")>;
+
+// ASIMD reverse, swap, table lookup (1-2 reg)
+def : InstRW<[A57Write_3cyc_1V], (instregex "VREV", "VSWP", "VTB(L|X)(1|2)")>;
+
+// ASIMD table lookup (3-4 reg)
+def : InstRW<[A57Write_6cyc_1V], (instregex "VTBL(3|4)", "VTBX(3|4)")>;
+
+// ASIMD transfer, scalar to core reg: 6cyc "L, I0/I1"
+def : InstRW<[A57Write_6cyc_1L_1I], (instregex "VGETLN")>;
+
+// ASIMD transfer, core reg to scalar: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VSETLN")>;
+
+// ASIMD transpose
+def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], (instregex "VTRN")>;
+
+// ASIMD unzip/zip, D-form
+def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V],
+ (instregex "VUZPd", "VZIPd")>;
+
+// ASIMD unzip/zip, Q-form
+def : InstRW<[A57Write_6cyc_1V, A57Write_6cyc_1V],
+ (instregex "VUZPq", "VZIPq")>;
+
+// --- 3.17 ASIMD Load Instructions ---
+
+// Overriden via InstRW for this processor.
+def : WriteRes<WriteVLD1, []>;
+def : WriteRes<WriteVLD2, []>;
+def : WriteRes<WriteVLD3, []>;
+def : WriteRes<WriteVLD4, []>;
+def : WriteRes<WriteVST1, []>;
+def : WriteRes<WriteVST2, []>;
+def : WriteRes<WriteVST3, []>;
+def : WriteRes<WriteVST4, []>;
+
+// 1-2 reg: 5cyc L, +I for writeback, 1 cyc wb latency
+def : InstRW<[A57Write_5cyc_1L], (instregex "VLD1(d|q)(8|16|32|64)$")>;
+def : InstRW<[A57Write_5cyc_1L_1I, A57WrBackOne],
+ (instregex "VLD1(d|q)(8|16|32|64)wb")>;
+
+// 3-4 reg: 6cyc L, +I for writeback, 1 cyc wb latency
+def : InstRW<[A57Write_6cyc_1L],
+ (instregex "VLD1(d|q)(8|16|32|64)(T|Q)$", "VLD1d64(T|Q)Pseudo")>;
+
+def : InstRW<[A57Write_6cyc_1L_1I, A57WrBackOne],
+ (instregex "VLD1(d|q)(8|16|32|64)(T|Q)wb")>;
+
+// ASIMD load, 1 element, one lane and all lanes: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex
+ "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], (instregex
+ "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", "VLD1LNq(8|16|32)Pseudo_UPD")>;
+
+// ASIMD load, 2 element, multiple, 2 reg: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V],
+ (instregex "VLD2(d|q)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD2(d|q)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
+
+// ASIMD load, 2 element, multiple, 4 reg: 9cyc "L, F0/F1"
+def : InstRW<[A57Write_9cyc_1L_1V], (instregex "VLD2b(8|16|32)$")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD2b(8|16|32)wb")>;
+
+// ASIMD load, 2 element, one lane and all lanes: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
+ (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
+ "VLD2LN(d|q)(8|16|32)Pseudo$")>;
+// 2 results + wb result
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V, A57WrBackOne],
+ (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
+// 1 result + wb result
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb",
+ "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
+
+// ASIMD load, 3 element, multiple, 3 reg: 9cyc "L, F0/F1"
+// 3 results
+def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V],
+ (instregex "VLD3(d|q)(8|16|32)$")>;
+// 1 result
+def : InstRW<[A57Write_9cyc_1L_1V],
+ (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
+// 3 results + wb
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
+ A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
+// 1 result + wb
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+
+// ASIMD load, 3 element, one lane, size 32: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
+ (instregex "VLD3LN(d|q)32$",
+ "VLD3LN(d|q)32Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3LN(d|q)32_UPD")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3LN(d|q)32Pseudo_UPD")>;
+
+// ASIMD load, 3 element, one lane, size 8/16: 9cyc "L, F0/F1"
+def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V],
+ (instregex "VLD3LN(d|q)(8|16)$",
+ "VLD3LN(d|q)(8|16)Pseudo$")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
+ A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3LN(d|q)(8|16)_UPD")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3LN(d|q)(8|16)Pseudo_UPD")>;
+
+// ASIMD load, 3 element, all lanes: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
+ (instregex "VLD3DUP(d|q)(8|16|32)$",
+ "VLD3DUP(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3DUP(d|q)(8|16|32)_UPD")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3DUP(d|q)(8|16|32)Pseudo_UPD")>;
+
+// ASIMD load, 4 element, multiple, 4 reg: 9cyc "L, F0/F1"
+def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V,
+ A57Write_9cyc_1L_1V],
+ (instregex "VLD4(d|q)(8|16|32)$")>;
+def : InstRW<[A57Write_9cyc_1L_1V],
+ (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
+ A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD4(d|q)(8|16|32)_UPD")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+
+// ASIMD load, 4 element, one lane, size 32: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V,
+ A57Write_8cyc_1L_1V],
+ (instregex "VLD4LN(d|q)32$",
+ "VLD4LN(d|q)32Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57WrBackOne],
+ (instregex "VLD4LN(d|q)32_UPD")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD4LN(d|q)32Pseudo_UPD")>;
+
+// ASIMD load, 4 element, one lane, size 8/16: 9cyc "L, F0/F1"
+def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V,
+ A57Write_9cyc_1L_1V],
+ (instregex "VLD4LN(d|q)(8|16)$",
+ "VLD4LN(d|q)(8|16)Pseudo$")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
+ A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
+ A57WrBackOne],
+ (instregex "VLD4LN(d|q)(8|16)_UPD")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD4LN(d|q)(8|16)Pseudo_UPD")>;
+
+// ASIMD load, 4 element, all lanes: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V,
+ A57Write_8cyc_1L_1V],
+ (instregex "VLD4DUP(d|q)(8|16|32)$",
+ "VLD4DUP(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57WrBackOne],
+ (instregex "VLD4DUP(d|q)(8|16|32)_UPD")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD4DUP(d|q)(8|16|32)Pseudo_UPD")>;
+
+// --- 3.18 ASIMD Store Instructions ---
+
+// ASIMD store, 1 element, multiple, 1 reg: 1cyc S
+def : InstRW<[A57Write_1cyc_1S], (instregex "VST1d(8|16|32|64)$")>;
+def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I],
+ (instregex "VST1d(8|16|32|64)wb")>;
+// ASIMD store, 1 element, multiple, 2 reg: 2cyc S
+def : InstRW<[A57Write_2cyc_1S], (instregex "VST1q(8|16|32|64)$")>;
+def : InstRW<[A57WrBackOne, A57Write_2cyc_1S_1I],
+ (instregex "VST1q(8|16|32|64)wb")>;
+// ASIMD store, 1 element, multiple, 3 reg: 3cyc S
+def : InstRW<[A57Write_3cyc_1S],
+ (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1I],
+ (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
+// ASIMD store, 1 element, multiple, 4 reg: 4cyc S
+def : InstRW<[A57Write_4cyc_1S],
+ (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
+def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1I],
+ (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
+// ASIMD store, 1 element, one lane: 3cyc "F0/F1, S"
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
+// ASIMD store, 2 element, multiple, 2 reg: 3cyc "F0/F1, S"
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST2(d|b)(8|16|32)$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST2(b|d)(8|16|32)wb")>;
+// ASIMD store, 2 element, multiple, 4 reg: 4cyc "F0/F1, S"
+def : InstRW<[A57Write_4cyc_1S_1V],
+ (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I],
+ (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
+// ASIMD store, 2 element, one lane: 3cyc "F0/F1, S"
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST2LN(d|q)(8|16|32)_UPD",
+ "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
+// ASIMD store, 3 element, multiple, 3 reg
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST3(d|q)(8|16|32)_UPD",
+ "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
+// ASIMD store, 3 element, one lane
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST3LN(d|q)(8|16|32)_UPD",
+ "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
+// ASIMD store, 4 element, multiple, 4 reg
+def : InstRW<[A57Write_4cyc_1S_1V],
+ (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I],
+ (instregex "VST4(d|q)(8|16|32)_UPD",
+ "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
+// ASIMD store, 4 element, one lane
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST4LN(d|q)(8|16|32)_UPD",
+ "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
+
+// --- 3.19 Cryptography Extensions ---
+// Crypto AES ops
+// AESD, AESE, AESIMC, AESMC: 3cyc F0
+def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>;
+// Crypto polynomial (64x64) multiply long (VMULL.P64): 3cyc F0
+def : InstRW<[A57Write_3cyc_1W], (instregex "^VMULLp64")>;
+// Crypto SHA1 xor ops: 6cyc F0/F1
+def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>;
+// Crypto SHA1 fast ops: 3cyc F0
+def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>;
+// Crypto SHA1 slow ops: 6cyc F0
+def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>;
+// Crypto SHA256 fast ops: 3cyc F0
+def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>;
+// Crypto SHA256 slow ops: 6cyc F0
+def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>;
+
+// --- 3.20 CRC ---
+def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>;
+
+// -----------------------------------------------------------------------------
+// Common definitions
+def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
+def : SchedAlias<WriteALU, A57Write_1cyc_1I>;
+
+def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
+def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>;
+def : SchedAlias<WriteBrTbl, A57Write_1cyc_1B_1I>;
+def : SchedAlias<WritePreLd, A57Write_4cyc_1L>;
+
+def : SchedAlias<WriteLd, A57Write_4cyc_1L>;
+def : SchedAlias<WriteST, A57Write_1cyc_1S>;
+def : ReadAdvance<ReadALU, 0>;
+
+} // SchedModel = CortexA57Model
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
new file mode 100644
index 0000000..670717d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
@@ -0,0 +1,323 @@
+//=- ARMScheduleA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+// Prefix: A57Write
+// Latency: #cyc
+// MicroOp Count/Types: #(B|I|M|L|S|X|W|V)
+//
+// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are
+// 11 micro-ops to be issued as follows: one to I pipe, six to S pipes and
+// four to V pipes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Define Generic 1 micro-op types
+
+def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; }
+def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
+def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
+def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17;
+ let ResourceCycles = [17]; }
+def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18;
+ let ResourceCycles = [18]; }
+def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19;
+ let ResourceCycles = [19]; }
+def A57Write_20cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 20;
+ let ResourceCycles = [20]; }
+def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; }
+def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; }
+def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2; }
+def A57Write_3cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 3; }
+def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; }
+def A57Write_2cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 2; }
+def A57Write_3cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 3; }
+def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; }
+def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32;
+ let ResourceCycles = [32]; }
+def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32;
+ let ResourceCycles = [32]; }
+def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35;
+ let ResourceCycles = [35]; }
+def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
+def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; }
+def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; }
+def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; }
+
+// A57Write_3cyc_1L - A57Write_20cyc_1L
+foreach Lat = 3-20 in {
+ def A57Write_#Lat#cyc_1L : SchedWriteRes<[A57UnitL]> {
+ let Latency = Lat;
+ }
+}
+
+// A57Write_4cyc_1S - A57Write_16cyc_1S
+foreach Lat = 4-16 in {
+ def A57Write_#Lat#cyc_1S : SchedWriteRes<[A57UnitS]> {
+ let Latency = Lat;
+ }
+}
+
+def A57Write_4cyc_1M : SchedWriteRes<[A57UnitL]> { let Latency = 4; }
+def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57Write_4cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 4; }
+def A57Write_5cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 5; }
+def A57Write_6cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 6; }
+def A57Write_6cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 6; }
+def A57Write_8cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 8; }
+def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; }
+def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; }
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 2 micro-op types
+
+def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 64;
+ let NumMicroOps = 2;
+ let ResourceCycles = [32, 32];
+}
+def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_1V_1X : SchedWriteRes<[A57UnitV,
+ A57UnitX]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV,
+ A57UnitX]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def A57Write_9cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI,
+ A57UnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1S_1I : SchedWriteRes<[A57UnitS,
+ A57UnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_1S_1I : SchedWriteRes<[A57UnitS,
+ A57UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1S_1I : SchedWriteRes<[A57UnitS,
+ A57UnitI]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1S_1M : SchedWriteRes<[A57UnitS,
+ A57UnitM]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_1B_1L : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 36;
+ let NumMicroOps = 2;
+ let ResourceCycles = [18, 18];
+}
+def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_4cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+// A57Write_3cyc_1L_1I - A57Write_20cyc_1L_1I
+foreach Lat = 3-20 in {
+ def A57Write_#Lat#cyc_1L_1I : SchedWriteRes<[A57UnitL, A57UnitI]> {
+ let Latency = Lat; let NumMicroOps = 2;
+ }
+}
+
+def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI,
+ A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_4cyc_1S_1V : SchedWriteRes<[A57UnitS,
+ A57UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+// A57Write_4cyc_1S_1I - A57Write_16cyc_1S_1I
+foreach Lat = 4-16 in {
+ def A57Write_#Lat#cyc_1S_1I : SchedWriteRes<[A57UnitS, A57UnitI]> {
+ let Latency = Lat; let NumMicroOps = 2;
+ }
+}
+
+def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 3 micro-op types
+
+def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI,
+ A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_1S_1V_1I : SchedWriteRes<[A57UnitS,
+ A57UnitV,
+ A57UnitI]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_4cyc_1S_1V_1I : SchedWriteRes<[A57UnitS,
+ A57UnitV,
+ A57UnitI]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def A57Write_4cyc_1I_1L_1M : SchedWriteRes<[A57UnitI, A57UnitL, A57UnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def A57Write_8cyc_1L_1V_1I : SchedWriteRes<[A57UnitL,
+ A57UnitV,
+ A57UnitI]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def A57Write_9cyc_1L_1V_1I : SchedWriteRes<[A57UnitL,
+ A57UnitV,
+ A57UnitI]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
index 519e595..4e72b13 100644
--- a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
@@ -1944,6 +1944,16 @@ def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5;
def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; }
def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4;
let NumMicroOps = 0; }
+def : SchedAlias<WriteMUL16, A9WriteM16>;
+def : SchedAlias<WriteMUL32, A9WriteM>;
+def : SchedAlias<WriteMUL64Lo, A9WriteM>;
+def : SchedAlias<WriteMUL64Hi, A9WriteMHi>;
+def : SchedAlias<WriteMAC16, A9WriteM16>;
+def : SchedAlias<WriteMAC32, A9WriteM>;
+def : SchedAlias<WriteMAC64Lo, A9WriteM>;
+def : SchedAlias<WriteMAC64Hi, A9WriteMHi>;
+def : ReadAdvance<ReadMUL, 0>;
+def : ReadAdvance<ReadMAC, 0>;
// Floating-point
// Only one FP or AGU instruction may issue per cycle. We model this
@@ -1953,6 +1963,7 @@ def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; }
+
def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; }
def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; }
@@ -1970,6 +1981,15 @@ def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; }
def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
+def : WriteRes<WriteVLD1, []>;
+def : WriteRes<WriteVLD2, []>;
+def : WriteRes<WriteVLD3, []>;
+def : WriteRes<WriteVLD4, []>;
+def : WriteRes<WriteVST1, []>;
+def : WriteRes<WriteVST2, []>;
+def : WriteRes<WriteVST3, []>;
+def : WriteRes<WriteVST4, []>;
+
// Reserve A9UnitFP for 2 consecutive cycles.
def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
let Latency = 4;
@@ -1992,6 +2012,7 @@ def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; }
// Load Integer.
def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; }
+def : SchedAlias<WriteLd, A9WriteL>;
// Load the upper 32-bits using the same micro-op.
def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3;
let NumMicroOps = 0; }
@@ -2471,6 +2492,34 @@ def : SchedAlias<WriteALUsr, A9WriteALUsr>;
def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
def : SchedAlias<ReadALU, A9ReadALU>;
def : SchedAlias<ReadALUsr, A9ReadALU>;
+def : SchedAlias<WriteST, A9WriteS>;
+
+// ===---------------------------------------------------------------------===//
+// Floating-point. Map target defined SchedReadWrite to processor specific ones
+//
+def : WriteRes<WriteFPCVT, [A9UnitFP, A9UnitAGU]> { let Latency = 4; }
+def : SchedAlias<WriteFPMOV, A9WriteFMov>;
+
+def : SchedAlias<WriteFPALU32, A9WriteF>;
+def : SchedAlias<WriteFPALU64, A9WriteF>;
+
+def : SchedAlias<WriteFPMUL32, A9WriteFMulS>;
+def : SchedAlias<WriteFPMUL64, A9WriteFMulD>;
+
+def : SchedAlias<WriteFPMAC32, A9WriteFMAS>;
+def : SchedAlias<WriteFPMAC64, A9WriteFMAD>;
+
+def : SchedAlias<WriteFPDIV32, A9WriteFDivS>;
+def : SchedAlias<WriteFPDIV64, A9WriteFDivD>;
+def : SchedAlias<WriteFPSQRT32, A9WriteFSqrtS>;
+def : SchedAlias<WriteFPSQRT64, A9WriteFSqrtD>;
+
+def : ReadAdvance<ReadFPMUL, 0>;
+def : ReadAdvance<ReadFPMAC, 0>;
+
+// ===---------------------------------------------------------------------===//
+// Subtarget-specific overrides. Map opcodes to list of SchedReadWrite types.
+//
def : InstRW< [WriteALU],
(instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
"BICrr")>;
@@ -2518,12 +2567,11 @@ def : InstRW<[A9WriteLb],
"LDRH", "LDRSH", "LDRSB")>;
def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>;
-def : WriteRes<WriteDiv, []> { let Latency = 0; }
+def : WriteRes<WriteDIV, []> { let Latency = 0; }
def : WriteRes<WriteBr, [A9UnitB]>;
def : WriteRes<WriteBrL, [A9UnitB]>;
def : WriteRes<WriteBrTbl, [A9UnitB]>;
def : WriteRes<WritePreLd, []>;
-def : SchedAlias<WriteCvtFP, A9WriteF>;
def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
} // SchedModel = CortexA9Model
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleM3.td b/contrib/llvm/lib/Target/ARM/ARMScheduleM3.td
new file mode 100644
index 0000000..93f8299
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleM3.td
@@ -0,0 +1,21 @@
+//=- ARMScheduleM3.td - ARM Cortex-M3 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the ARM Cortex-M3 processor.
+//
+//===----------------------------------------------------------------------===//
+
+def CortexM3Model : SchedMachineModel {
+ let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue
+ let MicroOpBufferSize = 0; // In-order
+ let LoadLatency = 2; // Latency when not pipelined, not pc-relative
+ let MispredictPenalty = 2; // Best case branch taken cost
+
+ let CompleteModel = 0;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td b/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td
index 1b40742..782be9b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td
@@ -70,15 +70,13 @@ def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
+// Multiply - aliased to sub-target specific later
+
// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
-def : WriteRes<WriteDiv, [R52UnitDiv]> {
- let Latency = 8; let ResourceCycles = [8]; // not pipelined
+def : WriteRes<WriteDIV, [R52UnitDiv]> {
+ let Latency = 8; let ResourceCycles = [8]; // non-pipelined
}
-// Loads
-def : WriteRes<WriteLd, [R52UnitLd]> { let Latency = 4; }
-def : WriteRes<WritePreLd, [R52UnitLd]> { let Latency = 4; }
-
// Branches - LR written in Late EX2
def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
@@ -86,11 +84,50 @@ def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
// Misc
def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
-def : WriteRes<WriteCvtFP, [R52UnitALU]> { let Latency = 3; }
+// Integer pipeline by-passes
def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage
def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
+def : ReadAdvance<ReadMUL, 0>;
+def : ReadAdvance<ReadMAC, 0>;
+
+// Floating-point. Map target-defined SchedReadWrites to subtarget
+def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
+
+def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
+ let Latency = 6;
+}
+
+def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
+ let Latency = 11; // as it is internally two insns (MUL then ADD)
+}
+
+def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
+ R52UnitFPALU, R52UnitFPALU]> {
+ let Latency = 11;
+}
+
+def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
+ let Latency = 7; // FP div takes fixed #cycles
+ let ResourceCycles = [7]; // is not pipelined
+}
+
+def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
+ let Latency = 17;
+ let ResourceCycles = [17];
+}
+
+def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
+def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
+// Overriden via InstRW for this processor.
+def : WriteRes<WriteVST1, []>;
+def : WriteRes<WriteVST2, []>;
+def : WriteRes<WriteVST3, []>;
+def : WriteRes<WriteVST4, []>;
+
+def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
+def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
//===----------------------------------------------------------------------===//
// Subtarget-specific SchedReadWrites.
@@ -106,6 +143,9 @@ def : ReadAdvance<R52Read_F2, 2>;
// Cortex-R52 specific SchedWrites for use with InstRW
def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
+def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> {
+ let Latency = 4; let NumMicroOps = 0;
+}
def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> {
let Latency = 8; let ResourceCycles = [8]; // not pipelined
}
@@ -120,6 +160,19 @@ def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
+// Alias generics to sub-target specific
+def : SchedAlias<WriteMUL16, R52WriteMAC>;
+def : SchedAlias<WriteMUL32, R52WriteMAC>;
+def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
+def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
+def : SchedAlias<WriteMAC16, R52WriteMAC>;
+def : SchedAlias<WriteMAC32, R52WriteMAC>;
+def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
+def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
+def : SchedAlias<WritePreLd, R52WriteLd>;
+def : SchedAlias<WriteLd, R52WriteLd>;
+def : SchedAlias<WriteST, R52WriteST>;
+
def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
let Latency = 4;
@@ -147,19 +200,17 @@ def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
-def R52WriteFPDIV_SP : SchedWriteRes<[R52UnitFPDIV]> {
- let Latency = 7; // FP div takes fixed #cycles
- let ResourceCycles = [7]; // is not pipelined
- }
-def R52WriteFPDIV_DP : SchedWriteRes<[R52UnitFPDIV]> {
- let Latency = 17;
- let ResourceCycles = [17];
-}
-
-
//===----------------------------------------------------------------------===//
-// Subtarget-specific - map operands to SchedReadWrites
+// Floating-point. Map target defined SchedReadWrites to processor specific ones
+//
+def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>;
+def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
+def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
+def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
+//===----------------------------------------------------------------------===//
+// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
+//
def : InstRW<[WriteALU], (instrs COPY)>;
def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
@@ -235,7 +286,7 @@ def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
"t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
- (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
+ (instregex "t2SDIV", "t2UDIV")>;
// Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
// However, that's non-trivial to specify, so we keep it uniform
@@ -294,15 +345,6 @@ def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
-//def : InstRW<[R52WriteLd, R52Read_ISS], (instregex "^LDRB?(_PRE_IMM|_POST_IMM)", "LDRrs")>;
-//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_PRE_REG", "LDRB?rr")>;
-//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_POST_REG")>;
-
-//def : InstRW<[R52WriteST, R52Read_ISS], (instregex "STRi12", "PICSTR")>;
-//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_PRE_REG", "STRB?_PRE_REG")>;
-//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_POST_REG", "STRB?_POST_REG")>;
-
-
// Integer Load, Multiple.
foreach Lat = 3-25 in {
def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
@@ -492,12 +534,6 @@ def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VAC
def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
-def : InstRW<[R52WriteFPDIV_SP, R52Read_F0, R52Read_F0], (instregex "VDIV(S|H)")>;
-def : InstRW<[R52WriteFPDIV_DP, R52Read_F0, R52Read_F0], (instregex "VDIVD")>;
-
-def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1],
- (instregex "(VFMA|VFMS|VFNMA|VFNMS)(D|H|S)")>;
-
def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
@@ -682,21 +718,24 @@ def R52WriteSTM : SchedWriteVariant<[
// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
// another instruction in slot-1, but only in the last issue.
-def R52WriteVLD1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5;}
-def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> {
+def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
+def : WriteRes<WriteVLD2, [R52UnitLd]> {
let Latency = 6;
let NumMicroOps = 3;
let ResourceCycles = [2];
+ let SingleIssue = 1;
}
-def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> {
+def : WriteRes<WriteVLD3, [R52UnitLd]> {
let Latency = 7;
let NumMicroOps = 5;
let ResourceCycles = [3];
+ let SingleIssue = 1;
}
-def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> {
+def : WriteRes<WriteVLD4, [R52UnitLd]> {
let Latency = 8;
let NumMicroOps = 7;
let ResourceCycles = [4];
+ let SingleIssue = 1;
}
def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 5;
@@ -777,9 +816,8 @@ def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHS
def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
-def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VMOV", "VORR", "VORN", "VREV")>;
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
-def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VMUL", "VNMUL", "VMLA")>;
def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
@@ -797,95 +835,6 @@ def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VR
def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
//---
-// VLDx. Vector Loads
-//---
-// 1-element structure load
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>;
-def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>;
-def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>;
-def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>;
-def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>;
-
-// 2-element structure load
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>;
-
-// 3-element structure load
-def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>;
-def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>;
-def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>;
-def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
-
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
-
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
-
-// 4-element structure load
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
-
-
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
-
-//---
// VSTx. Vector Stores
//---
// 1-element structure store
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
index ea2bf4b..b838688 100644
--- a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
@@ -133,6 +133,8 @@ let SchedModel = SwiftModel in {
def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
def : ReadAdvance<ReadALU, 0>;
def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
+ def : SchedAlias<WriteLd, SwiftWriteP2ThreeCycle>;
+ def : SchedAlias<WriteST, SwiftWriteP2>;
def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[
@@ -166,10 +168,10 @@ let SchedModel = SwiftModel in {
def : InstRW<[SwiftWriteP01OneCycle2x_load],
(instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
- def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
+ def SwiftWriteP0TwoCycleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[
- SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>,
+ SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCycleTwoUops ]>,
SchedVar<NoSchedPred, [ SwiftWriteP0OneCycle ]>
]>;
@@ -282,6 +284,18 @@ let SchedModel = SwiftModel in {
let ResourceCycles = [2, 3];
}
+ // Aliasing sub-target specific WriteRes to generic ones
+ def : SchedAlias<WriteMUL16, SwiftWriteP0FourCycle>;
+ def : SchedAlias<WriteMUL32, SwiftWriteP0FourCycle>;
+ def : SchedAlias<WriteMUL64Lo, SwiftP0P0P01FiveCycle>;
+ def : SchedAlias<WriteMUL64Hi, SwiftWrite5Cycle>;
+ def : SchedAlias<WriteMAC16, SwiftPredP0P01FourFiveCycle>;
+ def : SchedAlias<WriteMAC32, SwiftPredP0P01FourFiveCycle>;
+ def : SchedAlias<WriteMAC64Lo, SwiftWrite5Cycle>;
+ def : SchedAlias<WriteMAC64Hi, Swift2P03P01FiveCycle>;
+ def : ReadAdvance<ReadMUL, 0>;
+ def : SchedAlias<ReadMAC, SwiftReadAdvanceFourCyclesPred>;
+
// 4.2.15 Integer Multiply Accumulate, Long
// 4.2.16 Integer Multiply Accumulate, Dual
// 4.2.17 Integer Multiply Accumulate Accumulate, Long
@@ -300,7 +314,7 @@ let SchedModel = SwiftModel in {
let ResourceCycles = [1, 14];
}
// 4.2.18 Integer Divide
- def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround.
+ def : WriteRes<WriteDIV, [SwiftUnitDiv]>; // Workaround.
def : InstRW <[SwiftDiv],
(instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
@@ -310,7 +324,7 @@ let SchedModel = SwiftModel in {
let Latency = 3;
let NumMicroOps = 2;
}
- def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
+ def SwiftWriteP2P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
let Latency = 4;
let NumMicroOps = 2;
}
@@ -343,7 +357,7 @@ let SchedModel = SwiftModel in {
"tLDR(r|i|spi|pci|pciASM)")>;
def : InstRW<[SwiftWriteP2ThreeCycle],
(instregex "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>;
- def : InstRW<[SwiftWriteP2P01FourCyle],
+ def : InstRW<[SwiftWriteP2P01FourCycle],
(instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
"t2LDRpci_pic", "tLDRS(B|H)")>;
def : InstRW<[SwiftWriteP2P01ThreeCycle, SwiftWrBackOne],
@@ -597,8 +611,6 @@ let SchedModel = SwiftModel in {
def : InstRW<[SwiftWriteP1FourCycle],
(instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH",
"VMULL", "VQDMULL")>;
- def : InstRW<[SwiftWriteP1SixCycle],
- (instregex "VMULD", "VNMULD")>;
def : InstRW<[SwiftWriteP1FourCycle],
(instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)",
"VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>;
@@ -607,8 +619,6 @@ let SchedModel = SwiftModel in {
// 4.2.36 Advanced SIMD and VFP, Convert
def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
- // Fixpoint conversions.
- def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; }
// 4.2.37 Advanced SIMD and VFP, Move
def : InstRW<[SwiftWriteP0TwoCycle],
@@ -1036,6 +1046,40 @@ let SchedModel = SwiftModel in {
def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
+ // ===---------------------------------------------------------------------===//
+ // Floating-point. Map target defined SchedReadWrite to processor specific ones
+ //
+ def : SchedAlias<WriteFPCVT, SwiftWriteP1FourCycle>;
+ def : SchedAlias<WriteFPMOV, SwiftWriteP2ThreeCycle>;
+
+ def : SchedAlias<WriteFPALU32, SwiftWriteP0FourCycle>;
+ def : SchedAlias<WriteFPALU64, SwiftWriteP0SixCycle>;
+
+ def : SchedAlias<WriteFPMUL32, SwiftWriteP1FourCycle>;
+ def : SchedAlias<WriteFPMUL64, SwiftWriteP1SixCycle>;
+
+ def : SchedAlias<WriteFPMAC32, SwiftWriteP1FourCycle>;
+ def : SchedAlias<WriteFPMAC64, SwiftWriteP1FourCycle>;
+
+ def : SchedAlias<WriteFPDIV32, SwiftDiv17>;
+ def : SchedAlias<WriteFPSQRT32, SwiftDiv17>;
+
+ def : SchedAlias<WriteFPDIV64, SwiftDiv32>;
+ def : SchedAlias<WriteFPSQRT64, SwiftDiv32>;
+
+ def : ReadAdvance<ReadFPMUL, 0>;
+ def : ReadAdvance<ReadFPMAC, 0>;
+
+ // Overriden via InstRW for this processor.
+ def : WriteRes<WriteVLD1, []>;
+ def : WriteRes<WriteVLD2, []>;
+ def : WriteRes<WriteVLD3, []>;
+ def : WriteRes<WriteVLD4, []>;
+ def : WriteRes<WriteVST1, []>;
+ def : WriteRes<WriteVST2, []>;
+ def : WriteRes<WriteVST3, []>;
+ def : WriteRes<WriteVST4, []>;
+
// Not specified.
def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
// Preload.
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 3b99762..33dcf9b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -95,7 +95,7 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
Entry.Node = Src;
Entry.Ty = Type::getInt32Ty(*DAG.getContext());
- Entry.isSExt = false;
+ Entry.IsSExt = false;
Args.push_back(Entry);
} else {
Entry.Node = Src;
@@ -114,11 +114,11 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(
- TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
- TLI->getPointerTy(DAG.getDataLayout())),
- std::move(Args))
+ .setLibCallee(
+ TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
+ TLI->getPointerTy(DAG.getDataLayout())),
+ std::move(Args))
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
@@ -198,17 +198,18 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
return Chain;
// Issue loads / stores for the trailing (1 - 3) bytes.
+ auto getRemainingValueType = [](unsigned BytesLeft) {
+ return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
+ };
+ auto getRemainingSize = [](unsigned BytesLeft) {
+ return (BytesLeft >= 2) ? 2 : 1;
+ };
+
unsigned BytesLeftSave = BytesLeft;
i = 0;
while (BytesLeft) {
- if (BytesLeft >= 2) {
- VT = MVT::i16;
- VTSize = 2;
- } else {
- VT = MVT::i8;
- VTSize = 1;
- }
-
+ VT = getRemainingValueType(BytesLeft);
+ VTSize = getRemainingSize(BytesLeft);
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, dl, MVT::i32)),
@@ -224,14 +225,8 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
i = 0;
BytesLeft = BytesLeftSave;
while (BytesLeft) {
- if (BytesLeft >= 2) {
- VT = MVT::i16;
- VTSize = 2;
- } else {
- VT = MVT::i8;
- VTSize = 1;
- }
-
+ VT = getRemainingValueType(BytesLeft);
+ VTSize = getRemainingSize(BytesLeft);
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
DAG.getConstant(DstOff, dl, MVT::i32)),
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index e2df0bd..2c42a13 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -11,27 +11,43 @@
//
//===----------------------------------------------------------------------===//
+#include "ARM.h"
+
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "ARMCallLowering.h"
+#include "ARMLegalizerInfo.h"
+#include "ARMRegisterBankInfo.h"
+#endif
#include "ARMSubtarget.h"
#include "ARMFrameLowering.h"
-#include "ARMISelLowering.h"
#include "ARMInstrInfo.h"
-#include "ARMMachineFunctionInfo.h"
-#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "Thumb1FrameLowering.h"
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Attributes.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#endif
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/TargetParser.h"
+#include "llvm/Target/TargetOptions.h"
+#include <cassert>
+#include <string>
using namespace llvm;
@@ -76,11 +92,6 @@ ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
return *this;
}
-/// EnableExecuteOnly - Enables the generation of execute-only code on supported
-/// targets
-static cl::opt<bool>
-EnableExecuteOnly("arm-execute-only");
-
ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
StringRef FS) {
ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS);
@@ -90,13 +101,41 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
return new ARMFrameLowering(STI);
}
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+namespace {
+
+struct ARMGISelActualAccessor : public GISelAccessor {
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
+ const CallLowering *getCallLowering() const override {
+ return CallLoweringInfo.get();
+ }
+
+ const InstructionSelector *getInstructionSelector() const override {
+ return InstSelector.get();
+ }
+
+ const LegalizerInfo *getLegalizerInfo() const override {
+ return Legalizer.get();
+ }
+
+ const RegisterBankInfo *getRegBankInfo() const override {
+ return RegBankInfo.get();
+ }
+};
+
+} // end anonymous namespace
+#endif
+
ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const ARMBaseTargetMachine &TM, bool IsLittle)
: ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps),
- GenExecuteOnly(EnableExecuteOnly), CPUString(CPU), IsLittle(IsLittle),
- TargetTriple(TT), Options(TM.Options), TM(TM),
- FrameLowering(initializeFrameLowering(CPU, FS)),
+ CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options),
+ TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)),
// At this point initializeSubtargetDependencies has been called so
// we can query directly.
InstrInfo(isThumb1Only()
@@ -104,7 +143,29 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
: !isThumb()
? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
: (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
- TLInfo(TM, *this), GISel() {}
+ TLInfo(TM, *this) {
+ assert((isThumb() || hasARMOps()) &&
+ "Target must either be thumb or support ARM operations!");
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+ GISelAccessor *GISel = new GISelAccessor();
+#else
+ ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor();
+ GISel->CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering()));
+ GISel->Legalizer.reset(new ARMLegalizerInfo(*this));
+
+ auto *RBI = new ARMRegisterBankInfo(*getRegisterInfo());
+
+ // FIXME: At this point, we can't rely on Subtarget having RBI.
+ // It's awkward to mix passing RBI and the Subtarget; should we pass
+ // TII/TRI as well?
+ GISel->InstSelector.reset(createARMInstructionSelector(
+ *static_cast<const ARMBaseTargetMachine *>(&TM), *this, *RBI));
+
+ GISel->RegBankInfo.reset(RBI);
+#endif
+ setGISelAccessor(*GISel);
+}
const CallLowering *ARMSubtarget::getCallLowering() const {
assert(GISel && "Access to GlobalISel APIs not set");
@@ -148,11 +209,11 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isTargetDarwin()) {
StringRef ArchName = TargetTriple.getArchName();
- unsigned ArchKind = llvm::ARM::parseArch(ArchName);
- if (ArchKind == llvm::ARM::AK_ARMV7S)
+ unsigned ArchKind = ARM::parseArch(ArchName);
+ if (ArchKind == ARM::AK_ARMV7S)
// Default to the Swift CPU when targeting armv7s/thumbv7s.
CPUString = "swift";
- else if (ArchKind == llvm::ARM::AK_ARMV7K)
+ else if (ArchKind == ARM::AK_ARMV7K)
// Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k.
// ARMv7k does not use SjLj exception handling.
CPUString = "cortex-a7";
@@ -200,12 +261,12 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// support in the assembler and linker to be used. This would need to be
// fixed to fully support tail calls in Thumb1.
//
- // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
- // LR. This means if we need to reload LR, it takes an extra instructions,
- // which outweighs the value of the tail call; but here we don't know yet
- // whether LR is going to be used. Probably the right approach is to
- // generate the tail call here and turn it back into CALL/RET in
- // emitEpilogue if LR is used.
+ // For ARMv8-M, we /do/ implement tail calls. Doing this is tricky for v8-M
+ // baseline, since the LDM/POP instruction on Thumb doesn't take LR. This
+ // means if we need to reload LR, it takes extra instructions, which outweighs
+ // the value of the tail call; but here we don't know yet whether LR is going
+ // to be used. We generate the tail call here and turn it back into CALL/RET
+ // in emitEpilogue if LR is used.
// Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
// but we need to make sure there are enough registers; the only valid
@@ -274,6 +335,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
case CortexM3:
case ExynosM1:
case CortexR52:
+ case Kryo:
break;
case Krait:
PreISelOperandLatencyAdjustment = 1;
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index 8c8218d..e15b175 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -14,47 +14,93 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
#define LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
-
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMFrameLowering.h"
#include "ARMISelLowering.h"
-#include "ARMInstrInfo.h"
#include "ARMSelectionDAGInfo.h"
-#include "ARMSubtarget.h"
-#include "MCTargetDesc/ARMMCTargetDesc.h"
-#include "Thumb1FrameLowering.h"
-#include "Thumb1InstrInfo.h"
-#include "Thumb2InstrInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <memory>
#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "ARMGenSubtargetInfo.inc"
namespace llvm {
+
+class ARMBaseTargetMachine;
class GlobalValue;
class StringRef;
-class TargetOptions;
-class ARMBaseTargetMachine;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
- Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
- CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexR52, CortexM3,
- CortexA32, CortexA35, CortexA53, CortexA57, CortexA72, CortexA73,
- Krait, Swift, ExynosM1
+ Others,
+
+ CortexA12,
+ CortexA15,
+ CortexA17,
+ CortexA32,
+ CortexA35,
+ CortexA5,
+ CortexA53,
+ CortexA57,
+ CortexA7,
+ CortexA72,
+ CortexA73,
+ CortexA8,
+ CortexA9,
+ CortexM3,
+ CortexR4,
+ CortexR4F,
+ CortexR5,
+ CortexR52,
+ CortexR7,
+ ExynosM1,
+ Krait,
+ Kryo,
+ Swift
};
enum ARMProcClassEnum {
- None, AClass, RClass, MClass
+ None,
+
+ AClass,
+ MClass,
+ RClass
};
enum ARMArchEnum {
- ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te,
- ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r,
- ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a, ARMv8mMainline, ARMv8mBaseline,
+ ARMv2,
+ ARMv2a,
+ ARMv3,
+ ARMv3m,
+ ARMv4,
+ ARMv4t,
+ ARMv5,
+ ARMv5t,
+ ARMv5te,
+ ARMv5tej,
+ ARMv6,
+ ARMv6k,
+ ARMv6kz,
+ ARMv6m,
+ ARMv6sm,
+ ARMv6t2,
+ ARMv7a,
+ ARMv7em,
+ ARMv7m,
+ ARMv7r,
+ ARMv7ve,
+ ARMv81a,
+ ARMv82a,
+ ARMv8a,
+ ARMv8mBaseline,
+ ARMv8mMainline,
ARMv8r
};
@@ -162,16 +208,12 @@ protected:
/// FP registers for VFPv3.
bool HasD16 = false;
- /// HasHardwareDivide - True if subtarget supports [su]div
- bool HasHardwareDivide = false;
+ /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode
+ bool HasHardwareDivideInThumb = false;
/// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode
bool HasHardwareDivideInARM = false;
- /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
- /// instructions.
- bool HasT2ExtractPack = false;
-
/// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
/// instructions.
bool HasDataBarrier = false;
@@ -192,6 +234,10 @@ protected:
/// CPSR setting instruction.
bool AvoidCPSRPartialUpdate = false;
+ /// CheapPredicableCPSRDef - If true, disable +1 predication cost
+ /// for instructions updating CPSR. Enabled for Cortex-A57.
+ bool CheapPredicableCPSRDef = false;
+
/// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting
/// movs with shifter operand (i.e. asr, lsl, lsr).
bool AvoidMOVsShifterOperand = false;
@@ -200,6 +246,11 @@ protected:
/// avoid issue "normal" call instructions to callees which do not return.
bool HasRetAddrStack = false;
+ /// HasBranchPredictor - True if the subtarget has a branch predictor. Having
+ /// a branch predictor or not changes the expected cost of taking a branch
+ /// which affects the choice of whether to use predicated instructions.
+ bool HasBranchPredictor = true;
+
/// HasMPExtension - True if the subtarget supports Multiprocessing
/// extension (ARMv7 only).
bool HasMPExtension = false;
@@ -239,6 +290,10 @@ protected:
/// HasFPAO - if true, processor does positive address offset computation faster
bool HasFPAO = false;
+ /// HasFuseAES - if true, processor executes back to back AES instruction
+ /// pairs faster.
+ bool HasFuseAES = false;
+
/// If true, if conversion may decide to leave some instructions unpredicated.
bool IsProfitableToUnpredicate = false;
@@ -310,6 +365,10 @@ protected:
/// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
bool UseSjLjEH = false;
+ /// Implicitly convert an instruction to a different one if its immediates
+ /// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1.
+ bool NegativeImmediates = true;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment = 4;
@@ -362,6 +421,7 @@ public:
unsigned getMaxInlineSizeThreshold() const {
return 64;
}
+
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
@@ -373,15 +433,19 @@ public:
const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
+
const ARMBaseInstrInfo *getInstrInfo() const override {
return InstrInfo.get();
}
+
const ARMTargetLowering *getTargetLowering() const override {
return &TLInfo;
}
+
const ARMFrameLowering *getFrameLowering() const override {
return FrameLowering.get();
}
+
const ARMBaseRegisterInfo *getRegisterInfo() const override {
return &InstrInfo->getRegisterInfo();
}
@@ -451,19 +515,21 @@ public:
bool hasCRC() const { return HasCRC; }
bool hasRAS() const { return HasRAS; }
bool hasVirtualization() const { return HasVirtualization; }
+
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP;
}
- bool hasDivide() const { return HasHardwareDivide; }
+ bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; }
bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
- bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool hasDataBarrier() const { return HasDataBarrier; }
bool hasV7Clrex() const { return HasV7Clrex; }
bool hasAcquireRelease() const { return HasAcquireRelease; }
+
bool hasAnyDataBarrier() const {
return HasDataBarrier || (hasV6Ops() && !isThumb());
}
+
bool useMulOps() const { return UseMulOps; }
bool useFPVMLx() const { return !SlowFPVMLx; }
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
@@ -490,8 +556,10 @@ public:
bool nonpipelinedVFP() const { return NonpipelinedVFP; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+ bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; }
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
bool hasRetAddrStack() const { return HasRetAddrStack; }
+ bool hasBranchPredictor() const { return HasBranchPredictor; }
bool hasMPExtension() const { return HasMPExtension; }
bool hasDSP() const { return HasDSP; }
bool useNaClTrap() const { return UseNaClTrap; }
@@ -503,6 +571,10 @@ public:
bool hasD16() const { return HasD16; }
bool hasFullFP16() const { return HasFullFP16; }
+ bool hasFuseAES() const { return HasFuseAES; }
+ /// \brief Return true if the CPU supports any kind of instruction fusion.
+ bool hasFusion() const { return hasFuseAES(); }
+
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
@@ -561,9 +633,10 @@ public:
TargetTriple.getEnvironment() == Triple::EABIHF ||
isTargetWindows() || isAAPCS16_ABI();
}
+
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
- virtual bool isXRaySupported() const override;
+ bool isXRaySupported() const override;
bool isAPCS_ABI() const;
bool isAAPCS_ABI() const;
@@ -588,6 +661,7 @@ public:
bool useR7AsFramePointer() const {
return isTargetDarwin() || (!isTargetWindows() && isThumb());
}
+
/// Returns true if the frame setup is split into two separate pushes (first
/// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent
/// to lr. This is always required on Thumb1-only targets, as the push and
@@ -656,6 +730,7 @@ public:
/// True if fast-isel is used.
bool useFastISel() const;
};
-} // End llvm namespace
-#endif // ARMSUBTARGET_H
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 70c9567..c323a1d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -10,30 +10,47 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMTargetMachine.h"
#include "ARM.h"
-#include "ARMCallLowering.h"
-#include "ARMFrameLowering.h"
-#include "ARMInstructionSelector.h"
-#include "ARMLegalizerInfo.h"
-#include "ARMRegisterBankInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMMacroFusion.h"
+#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
#include "ARMTargetTransformInfo.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/ExecutionDepsFix.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
+#include <cassert>
+#include <memory>
+#include <string>
+
using namespace llvm;
static cl::opt<bool>
@@ -57,91 +74,57 @@ static cl::opt<cl::boolOrDefault>
EnableGlobalMerge("arm-global-merge", cl::Hidden,
cl::desc("Enable the global merge pass"));
+namespace llvm {
+ void initializeARMExecutionDepsFixPass(PassRegistry&);
+}
+
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget());
+ RegisterTargetMachine<ARMLETargetMachine> A(getTheThumbLETarget());
RegisterTargetMachine<ARMBETargetMachine> Y(getTheARMBETarget());
- RegisterTargetMachine<ThumbLETargetMachine> A(getTheThumbLETarget());
- RegisterTargetMachine<ThumbBETargetMachine> B(getTheThumbBETarget());
+ RegisterTargetMachine<ARMBETargetMachine> B(getTheThumbBETarget());
PassRegistry &Registry = *PassRegistry::getPassRegistry();
initializeGlobalISel(Registry);
initializeARMLoadStoreOptPass(Registry);
initializeARMPreAllocLoadStoreOptPass(Registry);
+ initializeARMConstantIslandsPass(Registry);
+ initializeARMExecutionDepsFixPass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
if (TT.isOSBinFormatMachO())
- return make_unique<TargetLoweringObjectFileMachO>();
+ return llvm::make_unique<TargetLoweringObjectFileMachO>();
if (TT.isOSWindows())
- return make_unique<TargetLoweringObjectFileCOFF>();
- return make_unique<ARMElfTargetObjectFile>();
+ return llvm::make_unique<TargetLoweringObjectFileCOFF>();
+ return llvm::make_unique<ARMElfTargetObjectFile>();
}
static ARMBaseTargetMachine::ARMABI
computeTargetABI(const Triple &TT, StringRef CPU,
const TargetOptions &Options) {
- if (Options.MCOptions.getABIName() == "aapcs16")
+ StringRef ABIName = Options.MCOptions.getABIName();
+
+ if (ABIName.empty())
+ ABIName = ARM::computeDefaultTargetABI(TT, CPU);
+
+ if (ABIName == "aapcs16")
return ARMBaseTargetMachine::ARM_ABI_AAPCS16;
- else if (Options.MCOptions.getABIName().startswith("aapcs"))
+ else if (ABIName.startswith("aapcs"))
return ARMBaseTargetMachine::ARM_ABI_AAPCS;
- else if (Options.MCOptions.getABIName().startswith("apcs"))
+ else if (ABIName.startswith("apcs"))
return ARMBaseTargetMachine::ARM_ABI_APCS;
- assert(Options.MCOptions.getABIName().empty() &&
- "Unknown target-abi option!");
-
- ARMBaseTargetMachine::ARMABI TargetABI =
- ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
-
- unsigned ArchKind = llvm::ARM::parseCPUArch(CPU);
- StringRef ArchName = llvm::ARM::getArchName(ArchKind);
- // FIXME: This is duplicated code from the front end and should be unified.
- if (TT.isOSBinFormatMachO()) {
- if (TT.getEnvironment() == llvm::Triple::EABI ||
- (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
- llvm::ARM::parseArchProfile(ArchName) == llvm::ARM::PK_M) {
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
- } else if (TT.isWatchABI()) {
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
- } else {
- TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
- }
- } else if (TT.isOSWindows()) {
- // FIXME: this is invalid for WindowsCE
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
- } else {
- // Select the default based on the platform.
- switch (TT.getEnvironment()) {
- case llvm::Triple::Android:
- case llvm::Triple::GNUEABI:
- case llvm::Triple::GNUEABIHF:
- case llvm::Triple::MuslEABI:
- case llvm::Triple::MuslEABIHF:
- case llvm::Triple::EABIHF:
- case llvm::Triple::EABI:
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
- break;
- case llvm::Triple::GNU:
- TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
- break;
- default:
- if (TT.isOSNetBSD())
- TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
- else
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
- break;
- }
- }
-
- return TargetABI;
+ llvm_unreachable("Unhandled/unknown ABI Name!");
+ return ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
}
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
const TargetOptions &Options,
bool isLittle) {
auto ABI = computeTargetABI(TT, CPU, Options);
- std::string Ret = "";
+ std::string Ret;
if (isLittle)
// Little endian.
@@ -219,49 +202,38 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
CPU, FS, Options, getEffectiveRelocModel(TT, RM), CM,
OL),
TargetABI(computeTargetABI(TT, CPU, Options)),
- TLOF(createTLOF(getTargetTriple())),
- Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) {
+ TLOF(createTLOF(getTargetTriple())), isLittle(isLittle) {
// Default to triple-appropriate float ABI
- if (Options.FloatABIType == FloatABI::Default)
- this->Options.FloatABIType =
- Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft;
+ if (Options.FloatABIType == FloatABI::Default) {
+ if (TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
+ TargetTriple.getEnvironment() == Triple::MuslEABIHF ||
+ TargetTriple.getEnvironment() == Triple::EABIHF ||
+ TargetTriple.isOSWindows() ||
+ TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16)
+ this->Options.FloatABIType = FloatABI::Hard;
+ else
+ this->Options.FloatABIType = FloatABI::Soft;
+ }
// Default to triple-appropriate EABI
if (Options.EABIVersion == EABI::Default ||
Options.EABIVersion == EABI::Unknown) {
// musl is compatible with glibc with regard to EABI version
- if (Subtarget.isTargetGNUAEABI() || Subtarget.isTargetMuslAEABI())
+ if ((TargetTriple.getEnvironment() == Triple::GNUEABI ||
+ TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
+ TargetTriple.getEnvironment() == Triple::MuslEABI ||
+ TargetTriple.getEnvironment() == Triple::MuslEABIHF) &&
+ !(TargetTriple.isOSWindows() || TargetTriple.isOSDarwin()))
this->Options.EABIVersion = EABI::GNU;
else
this->Options.EABIVersion = EABI::EABI5;
}
-}
-ARMBaseTargetMachine::~ARMBaseTargetMachine() {}
+ initAsmInfo();
+}
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-namespace {
-struct ARMGISelActualAccessor : public GISelAccessor {
- std::unique_ptr<CallLowering> CallLoweringInfo;
- std::unique_ptr<InstructionSelector> InstSelector;
- std::unique_ptr<LegalizerInfo> Legalizer;
- std::unique_ptr<RegisterBankInfo> RegBankInfo;
- const CallLowering *getCallLowering() const override {
- return CallLoweringInfo.get();
- }
- const InstructionSelector *getInstructionSelector() const override {
- return InstSelector.get();
- }
- const LegalizerInfo *getLegalizerInfo() const override {
- return Legalizer.get();
- }
- const RegisterBankInfo *getRegBankInfo() const override {
- return RegBankInfo.get();
- }
-};
-} // End anonymous namespace.
-#endif
+ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
const ARMSubtarget *
ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
@@ -294,24 +266,6 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
// function that reside in TargetOptions.
resetTargetOptions(F);
I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle);
-
-#ifndef LLVM_BUILD_GLOBAL_ISEL
- GISelAccessor *GISel = new GISelAccessor();
-#else
- ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor();
- GISel->CallLoweringInfo.reset(new ARMCallLowering(*I->getTargetLowering()));
- GISel->Legalizer.reset(new ARMLegalizerInfo());
-
- auto *RBI = new ARMRegisterBankInfo(*I->getRegisterInfo());
-
- // FIXME: At this point, we can't rely on Subtarget having RBI.
- // It's awkward to mix passing RBI and the Subtarget; should we pass
- // TII/TRI as well?
- GISel->InstSelector.reset(new ARMInstructionSelector(*I, *RBI));
-
- GISel->RegBankInfo.reset(RBI);
-#endif
- I->setGISelAccessor(*GISel);
}
return I.get();
}
@@ -322,22 +276,6 @@ TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() {
});
}
-void ARMTargetMachine::anchor() {}
-
-ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- CodeModel::Model CM, CodeGenOpt::Level OL,
- bool isLittle)
- : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) {
- initAsmInfo();
- if (!Subtarget.hasARMOps())
- report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
- "support ARM mode execution!");
-}
-
-void ARMLETargetMachine::anchor() {}
ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -345,9 +283,7 @@ ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
Optional<Reloc::Model> RM,
CodeModel::Model CM,
CodeGenOpt::Level OL)
- : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-
-void ARMBETargetMachine::anchor() {}
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -355,51 +291,40 @@ ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
Optional<Reloc::Model> RM,
CodeModel::Model CM,
CodeGenOpt::Level OL)
- : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
-
-void ThumbTargetMachine::anchor() {}
-
-ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL, bool isLittle)
- : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) {
- initAsmInfo();
-}
-
-void ThumbLETargetMachine::anchor() {}
-
-ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-
-void ThumbBETargetMachine::anchor() {}
-
-ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
namespace {
+
/// ARM Code Generator Pass Configuration Options.
class ARMPassConfig : public TargetPassConfig {
public:
- ARMPassConfig(ARMBaseTargetMachine *TM, PassManagerBase &PM)
+ ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
ARMBaseTargetMachine &getARMTargetMachine() const {
return getTM<ARMBaseTargetMachine>();
}
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override {
+ ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+ // add DAG Mutations here.
+ const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>();
+ if (ST.hasFusion())
+ DAG->addMutation(createARMMacroFusionDAGMutation());
+ return DAG;
+ }
+
+ ScheduleDAGInstrs *
+ createPostMachineScheduler(MachineSchedContext *C) const override {
+ ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+ // add DAG Mutations here.
+ const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>();
+ if (ST.hasFusion())
+ DAG->addMutation(createARMMacroFusionDAGMutation());
+ return DAG;
+ }
+
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
@@ -413,17 +338,31 @@ public:
void addPreSched2() override;
void addPreEmitPass() override;
};
-} // namespace
+
+class ARMExecutionDepsFix : public ExecutionDepsFix {
+public:
+ static char ID;
+ ARMExecutionDepsFix() : ExecutionDepsFix(ID, ARM::DPRRegClass) {}
+ StringRef getPassName() const override {
+ return "ARM Execution Dependency Fix";
+ }
+};
+char ARMExecutionDepsFix::ID;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(ARMExecutionDepsFix, "arm-execution-deps-fix",
+ "ARM Execution Dependency Fix", false, false)
TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new ARMPassConfig(this, PM);
+ return new ARMPassConfig(*this, PM);
}
void ARMPassConfig::addIRPasses() {
if (TM->Options.ThreadModel == ThreadModel::Single)
addPass(createLowerAtomicPass());
else
- addPass(createAtomicExpandPass(TM));
+ addPass(createAtomicExpandPass());
// Cmpxchg instructions are often used with a subsequent comparison to
// determine whether it succeeded. We can exploit existing control-flow in
@@ -438,7 +377,7 @@ void ARMPassConfig::addIRPasses() {
// Match interleaved memory accesses to ldN/stN intrinsics.
if (TM->getOptLevel() != CodeGenOpt::None)
- addPass(createInterleavedAccessPass(TM));
+ addPass(createInterleavedAccessPass());
}
bool ARMPassConfig::addPreISel() {
@@ -508,7 +447,7 @@ void ARMPassConfig::addPreSched2() {
if (EnableARMLoadStoreOpt)
addPass(createARMLoadStoreOptimizationPass());
- addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
+ addPass(new ARMExecutionDepsFix());
}
// Expand some pseudo instructions into multiple instructions to allow
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
index c6b70b9..22ce949 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -14,10 +14,14 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
#define LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
-#include "ARMInstrInfo.h"
#include "ARMSubtarget.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
+#include <memory>
namespace llvm {
@@ -32,7 +36,6 @@ public:
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- ARMSubtarget Subtarget;
bool isLittle;
mutable StringMap<std::unique_ptr<ARMSubtarget>> SubtargetMap;
@@ -43,8 +46,10 @@ public:
CodeGenOpt::Level OL, bool isLittle);
~ARMBaseTargetMachine() override;
- const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
const ARMSubtarget *getSubtargetImpl(const Function &F) const override;
+ // The no argument getSubtargetImpl, while it exists on some targets, is
+ // deprecated and should not be used.
+ const ARMSubtarget *getSubtargetImpl() const = delete;
bool isLittleEndian() const { return isLittle; }
/// \brief Get the TargetIRAnalysis for this target.
@@ -56,23 +61,15 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
-};
-/// ARM target machine.
-///
-class ARMTargetMachine : public ARMBaseTargetMachine {
- virtual void anchor();
- public:
- ARMTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool isLittle);
+ bool isMachineVerifierClean() const override {
+ return false;
+ }
};
-/// ARM little endian target machine.
+/// ARM/Thumb little endian target machine.
///
-class ARMLETargetMachine : public ARMTargetMachine {
- void anchor() override;
+class ARMLETargetMachine : public ARMBaseTargetMachine {
public:
ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -80,10 +77,9 @@ public:
CodeGenOpt::Level OL);
};
-/// ARM big endian target machine.
+/// ARM/Thumb big endian target machine.
///
-class ARMBETargetMachine : public ARMTargetMachine {
- void anchor() override;
+class ARMBETargetMachine : public ARMBaseTargetMachine {
public:
ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -91,41 +87,6 @@ public:
CodeGenOpt::Level OL);
};
-/// Thumb target machine.
-/// Due to the way architectures are handled, this represents both
-/// Thumb-1 and Thumb-2.
-///
-class ThumbTargetMachine : public ARMBaseTargetMachine {
- virtual void anchor();
-public:
- ThumbTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool isLittle);
-};
-
-/// Thumb little endian target machine.
-///
-class ThumbLETargetMachine : public ThumbTargetMachine {
- void anchor() override;
-public:
- ThumbLETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-};
-
-/// Thumb big endian target machine.
-///
-class ThumbBETargetMachine : public ThumbTargetMachine {
- void anchor() override;
-public:
- ThumbBETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-};
-
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
index 625c428..88bab64 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -8,16 +8,19 @@
//===----------------------------------------------------------------------===//
#include "ARMTargetObjectFile.h"
+#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/IR/Mangler.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
using namespace llvm;
using namespace dwarf;
@@ -27,9 +30,9 @@ using namespace dwarf;
void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
- const ARMTargetMachine &ARM_TM = static_cast<const ARMTargetMachine &>(TM);
- bool isAAPCS_ABI = ARM_TM.TargetABI == ARMTargetMachine::ARMABI::ARM_ABI_AAPCS;
- genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly();
+ const ARMBaseTargetMachine &ARM_TM = static_cast<const ARMBaseTargetMachine &>(TM);
+ bool isAAPCS_ABI = ARM_TM.TargetABI == ARMBaseTargetMachine::ARMABI::ARM_ABI_AAPCS;
+ // genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly();
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(isAAPCS_ABI);
@@ -40,16 +43,6 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
AttributesSection =
getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0);
-
- // Make code section unreadable when in execute-only mode
- if (genExecuteOnly) {
- unsigned Type = ELF::SHT_PROGBITS;
- unsigned Flags = ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_ARM_PURECODE;
- // Since we cannot modify flags for an existing section, we create a new
- // section with the right flags, and use 0 as the unique ID for
- // execute-only text
- TextSection = Ctx.getELFSection(".text", Type, Flags, 0, "", 0U);
- }
}
const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
@@ -71,21 +64,27 @@ getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
getContext());
}
-MCSection *
-ARMElfTargetObjectFile::getExplicitSectionGlobal(const GlobalObject *GO,
- SectionKind SK, const TargetMachine &TM) const {
+static bool isExecuteOnlyFunction(const GlobalObject *GO, SectionKind SK,
+ const TargetMachine &TM) {
+ if (const Function *F = dyn_cast<Function>(GO))
+ if (TM.getSubtarget<ARMSubtarget>(*F).genExecuteOnly() && SK.isText())
+ return true;
+ return false;
+}
+
+MCSection *ARMElfTargetObjectFile::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const {
// Set execute-only access for the explicit section
- if (genExecuteOnly && SK.isText())
+ if (isExecuteOnlyFunction(GO, SK, TM))
SK = SectionKind::getExecuteOnly();
return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, SK, TM);
}
-MCSection *
-ARMElfTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO,
- SectionKind SK, const TargetMachine &TM) const {
+MCSection *ARMElfTargetObjectFile::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const {
// Place the global in the execute-only text section
- if (genExecuteOnly && SK.isText())
+ if (isExecuteOnlyFunction(GO, SK, TM))
SK = SectionKind::getExecuteOnly();
return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, SK, TM);
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
index 24e755d..bd7aa1c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
@@ -11,19 +11,17 @@
#define LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCExpr.h"
namespace llvm {
-class MCContext;
-class TargetMachine;
-
class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
- mutable bool genExecuteOnly = false;
protected:
- const MCSection *AttributesSection;
+ const MCSection *AttributesSection = nullptr;
+
public:
ARMElfTargetObjectFile()
- : TargetLoweringObjectFileELF(), AttributesSection(nullptr) {
+ : TargetLoweringObjectFileELF() {
PLTRelativeVariantKind = MCSymbolRefExpr::VK_ARM_PREL31;
}
@@ -47,4 +45,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 2b6b36b..51b0fed 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -15,6 +15,24 @@ using namespace llvm;
#define DEBUG_TYPE "armtti"
+bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
+ const Function *Callee) const {
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+ const FeatureBitset &CallerBits =
+ TM.getSubtargetImpl(*Caller)->getFeatureBits();
+ const FeatureBitset &CalleeBits =
+ TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+ // To inline a callee, all features not in the whitelist must match exactly.
+ bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
+ (CalleeBits & ~InlineFeatureWhitelist);
+ // For features in the whitelist, the callee's features must be a subset of
+ // the callers'.
+ bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
+ (CalleeBits & InlineFeatureWhitelist);
+ return MatchExact && MatchSubset;
+}
+
int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
@@ -92,7 +110,8 @@ int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
}
-int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -310,7 +329,8 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
}
-int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
+int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// On NEON a a vector select gets lowered to vbsl.
@@ -335,7 +355,7 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
return LT.first;
}
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
@@ -504,7 +524,7 @@ int ARMTTIImpl::getArithmeticInstrCost(
}
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace, const Instruction *I) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
if (Src->isVectorTy() && Alignment != 16 &&
@@ -529,12 +549,14 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
unsigned NumElts = VecTy->getVectorNumElements();
- Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
- unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
+ auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
// vldN/vstN only support legal vector types of size 64 or 128 in bits.
- if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
- return Factor;
+ // Accesses having vector types that are a multiple of 128 bits can be
+ // matched to more than one vldN/vstN instruction.
+ if (NumElts % Factor == 0 &&
+ TLI->isLegalInterleavedAccessType(SubVecTy, DL))
+ return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 3c83cd9..0695a4e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -33,9 +33,38 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
const ARMSubtarget *ST;
const ARMTargetLowering *TLI;
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
+ // Currently the following features are excluded from InlineFeatureWhitelist.
+ // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16
+ // Depending on whether they are set or unset, different
+ // instructions/registers are available. For example, inlining a callee with
+ // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
+ // fail if the callee uses ARM only instructions, e.g. in inline asm.
+ const FeatureBitset InlineFeatureWhitelist = {
+ ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
+ ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
+ ARM::FeatureFullFP16, ARM::FeatureHWDivThumb,
+ ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
+ ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
+ ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
+ ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
+ ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
+ ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
+ ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
+ ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
+ ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
+ ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
+ ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
+ ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
+ ARM::FeatureHasSlowFPVMLx, ARM::FeatureVMLxForwarding,
+ ARM::FeaturePref32BitThumb, ARM::FeatureAvoidPartialCPSR,
+ ARM::FeatureCheapPredicableCPSR, ARM::FeatureAvoidMOVsShOp,
+ ARM::FeatureHasRetAddrStack, ARM::FeatureHasNoBranchPredictor,
+ ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization,
+ ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass,
+ ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls,
+ ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt,
+ ARM::FeatureNoNegativeImmediates
+ };
const ARMSubtarget *getST() const { return ST; }
const ARMTargetLowering *getTLI() const { return TLI; }
@@ -45,6 +74,9 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
+ bool areInlineCompatible(const Function *Caller,
+ const Function *Callee) const;
+
bool enableInterleavedAccessVectorization() { return true; }
/// Floating-point computation using ARMv8 AArch32 Advanced
@@ -82,7 +114,7 @@ public:
return 13;
}
- unsigned getRegisterBitWidth(bool Vector) {
+ unsigned getRegisterBitWidth(bool Vector) const {
if (Vector) {
if (ST->hasNEON())
return 128;
@@ -98,9 +130,11 @@ public:
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
- int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
- int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
@@ -118,7 +152,7 @@ public:
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index c243a2d..1129826 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -17,6 +17,8 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -39,10 +41,8 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/ARMEHABI.h"
-#include "llvm/Support/COFF.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetParser.h"
@@ -67,6 +67,9 @@ static cl::opt<ImplicitItModeTy> ImplicitItMode(
clEnumValN(ImplicitItModeTy::ThumbOnly, "thumb",
"Warn in ARM, emit implicit ITs in Thumb")));
+static cl::opt<bool> AddBuildAttributes("arm-add-build-attributes",
+ cl::init(false));
+
class ARMOperand;
enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
@@ -540,6 +543,10 @@ public:
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ // Add build attributes based on the selected target.
+ if (AddBuildAttributes)
+ getTargetStreamer().emitTargetAttributes(STI);
+
// Not in an ITBlock to start with.
ITState.CurPosition = ~0U;
@@ -915,40 +922,37 @@ public:
int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue()));
return Val != -1;
}
- bool isFBits16() const {
+
+ template<int64_t N, int64_t M>
+ bool isImmediate() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
- return Value >= 0 && Value <= 16;
+ return Value >= N && Value <= M;
}
- bool isFBits32() const {
+ template<int64_t N, int64_t M>
+ bool isImmediateS4() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
- return Value >= 1 && Value <= 32;
+ return ((Value & 3) == 0) && Value >= N && Value <= M;
+ }
+ bool isFBits16() const {
+ return isImmediate<0, 17>();
+ }
+ bool isFBits32() const {
+ return isImmediate<1, 33>();
}
bool isImm8s4() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
+ return isImmediateS4<-1020, 1020>();
}
bool isImm0_1020s4() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return ((Value & 3) == 0) && Value >= 0 && Value <= 1020;
+ return isImmediateS4<0, 1020>();
}
bool isImm0_508s4() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return ((Value & 3) == 0) && Value >= 0 && Value <= 508;
+ return isImmediateS4<0, 508>();
}
bool isImm0_508s4Neg() const {
if (!isImm()) return false;
@@ -958,27 +962,6 @@ public:
// explicitly exclude zero. we want that to use the normal 0_508 version.
return ((Value & 3) == 0) && Value > 0 && Value <= 508;
}
- bool isImm0_239() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 240;
- }
- bool isImm0_255() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 256;
- }
- bool isImm0_4095() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 4096;
- }
bool isImm0_4095Neg() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -986,145 +969,17 @@ public:
int64_t Value = -CE->getValue();
return Value > 0 && Value < 4096;
}
- bool isImm0_1() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 2;
- }
- bool isImm0_3() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 4;
- }
bool isImm0_7() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 8;
- }
- bool isImm0_15() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 16;
- }
- bool isImm0_31() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 32;
- }
- bool isImm0_63() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 64;
- }
- bool isImm8() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value == 8;
- }
- bool isImm16() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value == 16;
- }
- bool isImm32() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value == 32;
- }
- bool isShrImm8() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= 8;
- }
- bool isShrImm16() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= 16;
- }
- bool isShrImm32() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= 32;
- }
- bool isShrImm64() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= 64;
- }
- bool isImm1_7() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 8;
- }
- bool isImm1_15() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 16;
- }
- bool isImm1_31() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 32;
+ return isImmediate<0, 7>();
}
bool isImm1_16() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 17;
+ return isImmediate<1, 16>();
}
bool isImm1_32() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 33;
+ return isImmediate<1, 32>();
}
- bool isImm0_32() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 33;
- }
- bool isImm0_65535() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 65536;
+ bool isImm8_255() const {
+ return isImmediate<8, 255>();
}
bool isImm256_65535Expr() const {
if (!isImm()) return false;
@@ -1145,32 +1000,16 @@ public:
return Value >= 0 && Value < 65536;
}
bool isImm24bit() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value <= 0xffffff;
+ return isImmediate<0, 0xffffff + 1>();
}
bool isImmThumbSR() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 33;
+ return isImmediate<1, 33>();
}
bool isPKHLSLImm() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 32;
+ return isImmediate<0, 32>();
}
bool isPKHASRImm() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= 32;
+ return isImmediate<0, 33>();
}
bool isAdrLabel() const {
// If we have an immediate that's not a constant, treat it as a label
@@ -1187,6 +1026,15 @@ public:
ARM_AM::getSOImmVal(-Value) != -1);
}
bool isT2SOImm() const {
+ // If we have an immediate that's not a constant, treat it as an expression
+ // needing a fixup.
+ if (isImm() && !isa<MCConstantExpr>(getImm())) {
+ // We want to avoid matching :upper16: and :lower16: as we want these
+ // expressions to match in isImm0_65535Expr()
+ const ARMMCExpr *ARM16Expr = dyn_cast<ARMMCExpr>(getImm());
+ return (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 &&
+ ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16));
+ }
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
@@ -1245,6 +1093,20 @@ public:
return ARM_AM::getSOImmVal(Value) == -1 &&
ARM_AM::getSOImmVal(-Value) != -1;
}
+ bool isThumbModImmNeg1_7() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int32_t Value = -(int32_t)CE->getValue();
+ return 0 < Value && Value < 8;
+ }
+ bool isThumbModImmNeg8_255() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int32_t Value = -(int32_t)CE->getValue();
+ return 7 < Value && Value < 256;
+ }
bool isConstantPoolImm() const { return Kind == k_ConstantPoolImmediate; }
bool isBitfield() const { return Kind == k_BitfieldDescriptor; }
bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; }
@@ -2035,6 +1897,20 @@ public:
Inst.addOperand(MCOperand::createImm(Enc));
}
+ void addThumbModImmNeg8_255Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint32_t Val = -CE->getValue();
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addThumbModImmNeg1_7Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint32_t Val = -CE->getValue();
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
void addBitfieldOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// Munge the lsb/width into a bitfield mask.
@@ -2141,7 +2017,7 @@ public:
// The operand is actually a t2_so_imm, but we have its bitwise
// negation in the assembly source, so twiddle it here.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::createImm(~CE->getValue()));
+ Inst.addOperand(MCOperand::createImm(~(uint32_t)CE->getValue()));
}
void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const {
@@ -2149,7 +2025,7 @@ public:
// The operand is actually a t2_so_imm, but we have its
// negation in the assembly source, so twiddle it here.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::createImm(-CE->getValue()));
+ Inst.addOperand(MCOperand::createImm(-(uint32_t)CE->getValue()));
}
void addImm0_4095NegOperands(MCInst &Inst, unsigned N) const {
@@ -4330,7 +4206,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
// If some specific flag is already set, it means that some letter is
// present more than once, this is not acceptable.
- if (FlagsVal == ~0U || (FlagsVal & Flag))
+ if (Flag == ~0U || (FlagsVal & Flag))
return MatchOperand_NoMatch;
FlagsVal |= Flag;
}
@@ -5373,6 +5249,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// Fall though for the Identifier case that is not a register or a
// special name.
+ LLVM_FALLTHROUGH;
}
case AsmToken::LParen: // parenthesized expressions like (_strcmp-4)
case AsmToken::Integer: // things like 1f and 2b as a branch targets
@@ -5484,7 +5361,8 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
enum {
COFF = (1 << MCObjectFileInfo::IsCOFF),
ELF = (1 << MCObjectFileInfo::IsELF),
- MACHO = (1 << MCObjectFileInfo::IsMachO)
+ MACHO = (1 << MCObjectFileInfo::IsMachO),
+ WASM = (1 << MCObjectFileInfo::IsWasm),
};
static const struct PrefixEntry {
const char *Spelling;
@@ -5518,6 +5396,9 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
case MCObjectFileInfo::IsCOFF:
CurrentFormat = COFF;
break;
+ case MCObjectFileInfo::IsWasm:
+ CurrentFormat = WASM;
+ break;
}
if (~Prefix->SupportedFormats & CurrentFormat) {
@@ -6301,10 +6182,6 @@ bool ARMAsmParser::validatetLDMRegList(const MCInst &Inst,
else if (ListContainsPC && ListContainsLR)
return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
"PC and LR may not be in the register list simultaneously");
- else if (inITBlock() && !lastInITBlock() && ListContainsPC)
- return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
- "instruction must be outside of IT block or the last "
- "instruction in an IT block");
return false;
}
@@ -6366,6 +6243,12 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
return Warning(Loc, "predicated instructions should be in IT block");
}
+ // PC-setting instructions in an IT block, but not the last instruction of
+ // the block, are UNPREDICTABLE.
+ if (inExplicitITBlock() && !lastInITBlock() && isITBlockTerminator(Inst)) {
+ return Error(Loc, "instruction must be outside of IT block or the last instruction in an IT block");
+ }
+
const unsigned Opcode = Inst.getOpcode();
switch (Opcode) {
case ARM::LDRD:
@@ -6676,6 +6559,7 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
break;
}
case ARM::MOVi16:
+ case ARM::MOVTi16:
case ARM::t2MOVi16:
case ARM::t2MOVTi16:
{
@@ -6977,6 +6861,17 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) {
bool ARMAsmParser::processInstruction(MCInst &Inst,
const OperandVector &Operands,
MCStreamer &Out) {
+ // Check if we have the wide qualifier, because if it's present we
+ // must avoid selecting a 16-bit thumb instruction.
+ bool HasWideQualifier = false;
+ for (auto &Op : Operands) {
+ ARMOperand &ARMOp = static_cast<ARMOperand&>(*Op);
+ if (ARMOp.isToken() && ARMOp.getToken() == ".w") {
+ HasWideQualifier = true;
+ break;
+ }
+ }
+
switch (Inst.getOpcode()) {
// Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction.
case ARM::LDRT_POST:
@@ -7056,8 +6951,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
// Select the narrow version if the immediate will fit.
if (Inst.getOperand(1).getImm() > 0 &&
Inst.getOperand(1).getImm() <= 0xff &&
- !(static_cast<ARMOperand &>(*Operands[2]).isToken() &&
- static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w"))
+ !HasWideQualifier)
Inst.setOpcode(ARM::tLDRpci);
else
Inst.setOpcode(ARM::t2LDRpci);
@@ -7088,10 +6982,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
else if (Inst.getOpcode() == ARM::t2LDRConstPool)
TmpInst.setOpcode(ARM::t2LDRpci);
const ARMOperand &PoolOperand =
- (static_cast<ARMOperand &>(*Operands[2]).isToken() &&
- static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w") ?
- static_cast<ARMOperand &>(*Operands[4]) :
- static_cast<ARMOperand &>(*Operands[3]);
+ (HasWideQualifier ?
+ static_cast<ARMOperand &>(*Operands[4]) :
+ static_cast<ARMOperand &>(*Operands[3]));
const MCExpr *SubExprVal = PoolOperand.getConstantPoolImm();
// If SubExprVal is a constant we may be able to use a MOV
if (isa<MCConstantExpr>(SubExprVal) &&
@@ -8232,10 +8125,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
case ARM::t2LSRri:
case ARM::t2ASRri: {
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
- Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
- !(static_cast<ARMOperand &>(*Operands[3]).isToken() &&
- static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) {
+ !HasWideQualifier) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
@@ -8269,7 +8161,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
isARMLowRegister(Inst.getOperand(1).getReg()) &&
isARMLowRegister(Inst.getOperand(2).getReg()) &&
Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
- inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr))
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr) &&
+ !HasWideQualifier)
isNarrow = true;
MCInst TmpInst;
unsigned newOpc;
@@ -8303,27 +8196,43 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
bool isNarrow = false;
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
isARMLowRegister(Inst.getOperand(1).getReg()) &&
- inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi))
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi) &&
+ !HasWideQualifier)
isNarrow = true;
MCInst TmpInst;
unsigned newOpc;
- switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) {
- default: llvm_unreachable("unexpected opcode!");
- case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break;
- case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break;
- case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break;
- case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
- case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break;
- }
+ unsigned Shift = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm());
unsigned Amount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
+ bool isMov = false;
+ // MOV rd, rm, LSL #0 is actually a MOV instruction
+ if (Shift == ARM_AM::lsl && Amount == 0) {
+ isMov = true;
+ // The 16-bit encoding of MOV rd, rm, LSL #N is explicitly encoding T2 of
+ // MOV (register) in the ARMv8-A and ARMv8-M manuals, and immediate 0 is
+ // unpredictable in an IT block so the 32-bit encoding T3 has to be used
+ // instead.
+ if (inITBlock()) {
+ isNarrow = false;
+ }
+ newOpc = isNarrow ? ARM::tMOVSr : ARM::t2MOVr;
+ } else {
+ switch(Shift) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break;
+ case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break;
+ case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break;
+ case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
+ case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break;
+ }
+ }
if (Amount == 32) Amount = 0;
TmpInst.setOpcode(newOpc);
TmpInst.addOperand(Inst.getOperand(0)); // Rd
- if (isNarrow)
+ if (isNarrow && !isMov)
TmpInst.addOperand(MCOperand::createReg(
Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
TmpInst.addOperand(Inst.getOperand(1)); // Rn
- if (newOpc != ARM::t2RRX)
+ if (newOpc != ARM::t2RRX && !isMov)
TmpInst.addOperand(MCOperand::createImm(Amount));
TmpInst.addOperand(Inst.getOperand(3)); // CondCode
TmpInst.addOperand(Inst.getOperand(4));
@@ -8515,11 +8424,10 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
// wide encoding wasn't explicit.
if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
!isARMLowRegister(Inst.getOperand(0).getReg()) ||
- (unsigned)Inst.getOperand(2).getImm() > 255 ||
- ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
- (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
- static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
+ (Inst.getOperand(2).isImm() &&
+ (unsigned)Inst.getOperand(2).getImm() > 255) ||
+ Inst.getOperand(5).getReg() != (inITBlock() ? 0 : ARM::CPSR) ||
+ HasWideQualifier)
break;
MCInst TmpInst;
TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ?
@@ -8548,8 +8456,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
}
if (!Transform ||
Inst.getOperand(5).getReg() != 0 ||
- (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
- static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
+ HasWideQualifier)
break;
MCInst TmpInst;
TmpInst.setOpcode(ARM::tADDhirr);
@@ -8667,12 +8574,10 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
// If we can use the 16-bit encoding and the user didn't explicitly
// request the 32-bit variant, transform it here.
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
- (unsigned)Inst.getOperand(1).getImm() <= 255 &&
- ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
- Inst.getOperand(4).getReg() == ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
- (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
- static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
+ (Inst.getOperand(1).isImm() &&
+ (unsigned)Inst.getOperand(1).getImm() <= 255) &&
+ Inst.getOperand(4).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
+ !HasWideQualifier) {
// The operands aren't in the same order for tMOVi8...
MCInst TmpInst;
TmpInst.setOpcode(ARM::tMOVi8);
@@ -8693,8 +8598,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
isARMLowRegister(Inst.getOperand(1).getReg()) &&
Inst.getOperand(2).getImm() == ARMCC::AL &&
Inst.getOperand(4).getReg() == ARM::CPSR &&
- (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
- static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
+ !HasWideQualifier) {
// The operands aren't the same for tMOV[S]r... (no cc_out)
MCInst TmpInst;
TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr);
@@ -8716,8 +8620,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
isARMLowRegister(Inst.getOperand(1).getReg()) &&
Inst.getOperand(2).getImm() == 0 &&
- (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
- static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
+ !HasWideQualifier) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("Illegal opcode!");
@@ -8816,11 +8719,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
if ((isARMLowRegister(Inst.getOperand(1).getReg()) &&
isARMLowRegister(Inst.getOperand(2).getReg())) &&
Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
- ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
- (!static_cast<ARMOperand &>(*Operands[3]).isToken() ||
- !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower(
- ".w"))) {
+ Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
+ !HasWideQualifier) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
@@ -8856,11 +8756,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
isARMLowRegister(Inst.getOperand(2).getReg())) &&
(Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() ||
Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) &&
- ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
- (!static_cast<ARMOperand &>(*Operands[3]).isToken() ||
- !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower(
- ".w"))) {
+ Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
+ !HasWideQualifier) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
@@ -8918,6 +8815,9 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR &&
inITBlock())
return Match_RequiresNotITBlock;
+ // LSL with zero immediate is not allowed in an IT block
+ if (Opc == ARM::tLSLri && Inst.getOperand(3).getImm() == 0 && inITBlock())
+ return Match_RequiresNotITBlock;
} else if (isThumbOne()) {
// Some high-register supporting Thumb1 encodings only allow both registers
// to be from r0-r7 when in Thumb2.
@@ -8932,6 +8832,22 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
return Match_RequiresV6;
}
+ // Before ARMv8 the rules for when SP is allowed in t2MOVr are more complex
+ // than the loop below can handle, so it uses the GPRnopc register class and
+ // we do SP handling here.
+ if (Opc == ARM::t2MOVr && !hasV8Ops())
+ {
+ // SP as both source and destination is not allowed
+ if (Inst.getOperand(0).getReg() == ARM::SP &&
+ Inst.getOperand(1).getReg() == ARM::SP)
+ return Match_RequiresV8;
+ // When flags-setting SP as either source or destination is not allowed
+ if (Inst.getOperand(4).getReg() == ARM::CPSR &&
+ (Inst.getOperand(0).getReg() == ARM::SP ||
+ Inst.getOperand(1).getReg() == ARM::SP))
+ return Match_RequiresV8;
+ }
+
for (unsigned I = 0; I < MCID.NumOperands; ++I)
if (MCID.OpInfo[I].RegClass == ARM::rGPRRegClassID) {
// rGPRRegClass excludes PC, and also excluded SP before ARMv8
@@ -8945,7 +8861,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
}
namespace llvm {
-template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) {
+template <> inline bool IsCPSRDead<MCInst>(const MCInst *Instr) {
return true; // In an assembly source, no need to second-guess
}
}
@@ -8975,6 +8891,7 @@ bool ARMAsmParser::isITBlockTerminator(MCInst &Inst) const {
// operands. We only care about Thumb instructions here, as ARM instructions
// obviously can't be in an IT block.
switch (Inst.getOpcode()) {
+ case ARM::tLDMIA:
case ARM::t2LDMIA:
case ARM::t2LDMIA_UPD:
case ARM::t2LDMDB:
@@ -9076,6 +8993,8 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst,
return PlainMatchResult;
}
+std::string ARMMnemonicSpellCheck(StringRef S, uint64_t FBS);
+
static const char *getSubtargetFeatureName(uint64_t Val);
bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
@@ -9088,6 +9007,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
MatchResult = MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
PendConditionalInstruction, Out);
+ SMLoc ErrorLoc;
+ if (ErrorInfo < Operands.size()) {
+ ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ }
+
switch (MatchResult) {
case Match_Success:
// Context sensitive operand constraints aren't handled by the matcher,
@@ -9162,9 +9088,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(ErrorLoc, "invalid operand for instruction");
}
- case Match_MnemonicFail:
- return Error(IDLoc, "invalid instruction",
+ case Match_MnemonicFail: {
+ uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+ std::string Suggestion = ARMMnemonicSpellCheck(
+ ((ARMOperand &)*Operands[0]).getToken(), FBS);
+ return Error(IDLoc, "invalid instruction" + Suggestion,
((ARMOperand &)*Operands[0]).getLocRange());
+ }
case Match_RequiresNotITBlock:
return Error(IDLoc, "flag setting instruction only valid outside IT block");
case Match_RequiresITBlock:
@@ -9177,16 +9107,52 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "instruction variant requires ARMv8 or later");
case Match_RequiresFlagSetting:
return Error(IDLoc, "no flag-preserving variant of this instruction available");
- case Match_ImmRange0_15: {
- SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
- if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ case Match_ImmRange0_1:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,1]");
+ case Match_ImmRange0_3:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,3]");
+ case Match_ImmRange0_7:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,7]");
+ case Match_ImmRange0_15:
return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
- }
- case Match_ImmRange0_239: {
- SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
- if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ case Match_ImmRange0_31:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,31]");
+ case Match_ImmRange0_32:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,32]");
+ case Match_ImmRange0_63:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,63]");
+ case Match_ImmRange0_239:
return Error(ErrorLoc, "immediate operand must be in the range [0,239]");
- }
+ case Match_ImmRange0_255:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,255]");
+ case Match_ImmRange0_4095:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,4095]");
+ case Match_ImmRange0_65535:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,65535]");
+ case Match_ImmRange1_7:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,7]");
+ case Match_ImmRange1_8:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,8]");
+ case Match_ImmRange1_15:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,15]");
+ case Match_ImmRange1_16:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,16]");
+ case Match_ImmRange1_31:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,31]");
+ case Match_ImmRange1_32:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,32]");
+ case Match_ImmRange1_64:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,64]");
+ case Match_ImmRange8_8:
+ return Error(ErrorLoc, "immediate operand must be 8.");
+ case Match_ImmRange16_16:
+ return Error(ErrorLoc, "immediate operand must be 16.");
+ case Match_ImmRange32_32:
+ return Error(ErrorLoc, "immediate operand must be 32.");
+ case Match_ImmRange256_65535:
+ return Error(ErrorLoc, "immediate operand must be in the range [255,65535]");
+ case Match_ImmRange0_16777215:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,0xffffff]");
case Match_AlignedMemoryRequiresNone:
case Match_DupAlignedMemoryRequiresNone:
case Match_AlignedMemoryRequires16:
@@ -10244,8 +10210,8 @@ static const struct {
{ ARM::AEK_CRYPTO, Feature_HasV8,
{ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
{ ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} },
- { (ARM::AEK_HWDIV | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass,
- {ARM::FeatureHWDiv, ARM::FeatureHWDivARM} },
+ { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass,
+ {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} },
{ ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
{ ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
{ ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} },
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index ac3d8c7..5ab236b 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -7,21 +7,24 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
-#include "MCTargetDesc/ARMMCExpr.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
#include <vector>
using namespace llvm;
@@ -31,6 +34,7 @@ using namespace llvm;
typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
+
// Handles the condition code status of instructions in IT blocks
class ITStatus
{
@@ -81,9 +85,7 @@ namespace {
private:
std::vector<unsigned char> ITStates;
};
-}
-namespace {
/// ARM disassembler for all ARM platforms.
class ARMDisassembler : public MCDisassembler {
public:
@@ -91,7 +93,7 @@ public:
MCDisassembler(STI, Ctx) {
}
- ~ARMDisassembler() override {}
+ ~ARMDisassembler() override = default;
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -106,7 +108,7 @@ public:
MCDisassembler(STI, Ctx) {
}
- ~ThumbDisassembler() override {}
+ ~ThumbDisassembler() override = default;
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -118,7 +120,8 @@ private:
DecodeStatus AddThumbPredicate(MCInst&) const;
void UpdateThumbVFPPredicate(MCInst&) const;
};
-}
+
+} // end anonymous namespace
static bool Check(DecodeStatus &Out, DecodeStatus In) {
switch (In) {
@@ -135,7 +138,6 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
llvm_unreachable("Invalid DecodeStatus!");
}
-
// Forward declare these because the autogenerated code will reference them.
// Definitions are further down.
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -319,7 +321,6 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-
static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
@@ -395,8 +396,9 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
+
#include "ARMGenDisassemblerTables.inc"
static MCDisassembler *createARMDisassembler(const Target &T,
@@ -416,8 +418,7 @@ static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
uint64_t Address, raw_ostream &OS,
raw_ostream &CS,
uint32_t Insn,
- DecodeStatus Result)
-{
+ DecodeStatus Result) {
switch (MI.getOpcode()) {
case ARM::HVC: {
// HVC is undefined if condition = 0xf otherwise upredictable
@@ -461,74 +462,39 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result);
}
- // VFP and NEON instructions, similarly, are shared between ARM
- // and Thumb modes.
- Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
- }
-
- Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
- }
-
- Result =
- decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- // Add a fake predicate operand, because we share these instruction
- // definitions with Thumb2 where these instructions are predicable.
- if (!DecodePredicateOperand(MI, 0xE, Address, this))
- return MCDisassembler::Fail;
- return Result;
- }
-
- Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address,
- this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- // Add a fake predicate operand, because we share these instruction
- // definitions with Thumb2 where these instructions are predicable.
- if (!DecodePredicateOperand(MI, 0xE, Address, this))
- return MCDisassembler::Fail;
- return Result;
- }
-
- Result =
- decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- // Add a fake predicate operand, because we share these instruction
- // definitions with Thumb2 where these instructions are predicable.
- if (!DecodePredicateOperand(MI, 0xE, Address, this))
- return MCDisassembler::Fail;
- return Result;
- }
+ struct DecodeTable {
+ const uint8_t *P;
+ bool DecodePred;
+ };
- Result =
- decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
- }
+ const DecodeTable Tables[] = {
+ {DecoderTableVFP32, false}, {DecoderTableVFPV832, false},
+ {DecoderTableNEONData32, true}, {DecoderTableNEONLoadStore32, true},
+ {DecoderTableNEONDup32, true}, {DecoderTablev8NEON32, false},
+ {DecoderTablev8Crypto32, false},
+ };
- Result =
- decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
+ for (auto Table : Tables) {
+ Result = decodeInstruction(Table.P, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (Table.DecodePred && !DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return Result;
+ }
}
- Size = 0;
+ Size = 4;
return MCDisassembler::Fail;
}
namespace llvm {
+
extern const MCInstrDesc ARMInsts[];
-}
+
+} // end namespace llvm
/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
/// immediate Value in the MCInst. The immediate Value has had any PC
@@ -859,7 +825,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return MCDisassembler::Fail;
}
-
extern "C" void LLVMInitializeARMDisassembler() {
TargetRegistry::RegisterMCDisassembler(getTheARMLETarget(),
createARMDisassembler);
@@ -1056,7 +1021,6 @@ static const uint16_t QPRDecoderTable[] = {
ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15
};
-
static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 31 || (RegNo & 1) != 0)
@@ -1676,7 +1640,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
case ARM::LDRD:
case ARM::LDRD_PRE:
case ARM::LDRD_POST:
- if (type && Rn == 15){
+ if (type && Rn == 15) {
if (Rt2 == 15)
S = MCDisassembler::SoftFail;
break;
@@ -1693,7 +1657,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
case ARM::LDRH:
case ARM::LDRH_PRE:
case ARM::LDRH_POST:
- if (type && Rn == 15){
+ if (type && Rn == 15) {
if (Rt == 15)
S = MCDisassembler::SoftFail;
break;
@@ -1711,7 +1675,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
case ARM::LDRSB:
case ARM::LDRSB_PRE:
case ARM::LDRSB_POST:
- if (type && Rn == 15){
+ if (type && Rn == 15) {
if (Rt == 15)
S = MCDisassembler::SoftFail;
break;
@@ -2309,7 +2273,6 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3748,7 +3711,6 @@ static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-
static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4073,7 +4035,7 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
static DecodeStatus
DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder){
+ uint64_t Address, const void *Decoder) {
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<9>(Val<<1) + 4,
true, 2, Inst, Decoder))
Inst.addOperand(MCOperand::createImm(SignExtend32<9>(Val << 1)));
@@ -4081,7 +4043,8 @@ DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder){
+ uint64_t Address,
+ const void *Decoder) {
// Val is passed in as S:J1:J2:imm10:imm11
// Note no trailing zero after imm11. Also the J1 and J2 values are from
// the encoded instruction. So here change to I1 and I2 values via:
@@ -4247,7 +4210,8 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder){
+ uint64_t Address,
+ const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -4323,7 +4287,6 @@ static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4506,7 +4469,6 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4637,7 +4599,6 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4771,7 +4732,6 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -5266,9 +5226,8 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
-
DecodeStatus S = MCDisassembler::Success;
unsigned CRm = fieldFromInstruction(Val, 0, 4);
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 3667952..57b9136 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -20,7 +20,15 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -73,7 +81,6 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
unsigned Opcode = MI->getOpcode();
switch (Opcode) {
-
// Check for MOVs and print canonical forms, instead.
case ARM::MOVsr: {
// FIXME: Thumb variants?
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 9d80eed..86873a3 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -235,4 +235,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index a58d5b3..a77df7a 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -7,15 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMMCTargetDesc.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMAsmBackend.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMAsmBackendDarwin.h"
#include "MCTargetDesc/ARMAsmBackendELF.h"
#include "MCTargetDesc/ARMAsmBackendWinCOFF.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -31,10 +33,8 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/MachO.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -98,6 +98,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_t2_movt_hi16", 0, 20, 0},
{"fixup_t2_movw_lo16", 0, 20, 0},
{"fixup_arm_mod_imm", 0, 12, 0},
+ {"fixup_t2_so_imm", 0, 26, 0},
};
const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = {
// This table *must* be in the order that the fixup_* kinds are defined in
@@ -148,6 +149,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_t2_movt_hi16", 12, 20, 0},
{"fixup_t2_movw_lo16", 12, 20, 0},
{"fixup_arm_mod_imm", 20, 12, 0},
+ {"fixup_t2_so_imm", 26, 6, 0},
};
if (Kind < FirstTargetFixupKind)
@@ -356,14 +358,30 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf,
return Value;
}
-unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- bool IsPCRel, MCContext *Ctx,
- bool IsLittleEndian,
- bool IsResolved) const {
+unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target, uint64_t Value,
+ bool IsResolved, MCContext &Ctx,
+ bool IsLittleEndian) const {
unsigned Kind = Fixup.getKind();
+
+ // MachO tries to make .o files that look vaguely pre-linked, so for MOVW/MOVT
+ // and .word relocations they put the Thumb bit into the addend if possible.
+ // Other relocation types don't want this bit though (branches couldn't encode
+ // it if it *was* present, and no other relocations exist) and it can
+ // interfere with checking valid expressions.
+ if (const MCSymbolRefExpr *A = Target.getSymA()) {
+ if (A->hasSubsectionsViaSymbols() && Asm.isThumbFunc(&A->getSymbol()) &&
+ (Kind == FK_Data_4 || Kind == ARM::fixup_arm_movw_lo16 ||
+ Kind == ARM::fixup_arm_movt_hi16 || Kind == ARM::fixup_t2_movw_lo16 ||
+ Kind == ARM::fixup_t2_movt_hi16))
+ Value |= 1;
+ }
+
switch (Kind) {
default:
- llvm_unreachable("Unknown fixup kind!");
+ Ctx.reportError(Fixup.getLoc(), "bad relocation fixup type");
+ return 0;
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
@@ -373,7 +391,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case FK_SecRel_4:
return Value;
case ARM::fixup_arm_movt_hi16:
- if (!IsPCRel)
+ if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
Value >>= 16;
LLVM_FALLTHROUGH;
case ARM::fixup_arm_movw_lo16: {
@@ -385,7 +403,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return Value;
}
case ARM::fixup_t2_movt_hi16:
- if (!IsPCRel)
+ if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
Value >>= 16;
LLVM_FALLTHROUGH;
case ARM::fixup_t2_movw_lo16: {
@@ -412,8 +430,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value = -Value;
isAdd = false;
}
- if (Ctx && Value >= 4096) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Value >= 4096) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
Value |= isAdd << 23;
@@ -433,8 +451,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value = -Value;
opc = 2; // 0b0010
}
- if (Ctx && ARM_AM::getSOImmVal(Value) == -1) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (ARM_AM::getSOImmVal(Value) == -1) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
// Encode the immediate and shift the opcode into place.
@@ -502,6 +520,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return swapHalfWords(out, IsLittleEndian);
}
case ARM::fixup_arm_thumb_bl: {
+ // FIXME: We get both thumb1 and thumb2 in here, so we can only check for
+ // the less strict thumb2 value.
+ if (!isInt<26>(Value - 4)) {
+ Ctx.reportError(Fixup.getLoc(), "Relocation out of range");
+ return 0;
+ }
+
// The value doesn't encode the low bit (always zero) and is offset by
// four. The 32-bit immediate value is encoded as
// imm32 = SignExtend(S:I1:I2:imm10:imm11:0)
@@ -541,8 +566,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
//
// Note that the halfwords are stored high first, low second; so we need
// to transpose the fixup value here to map properly.
- if (Ctx && Value % 4 != 0) {
- Ctx->reportError(Fixup.getLoc(), "misaligned ARM call destination");
+ if (Value % 4 != 0) {
+ Ctx.reportError(Fixup.getLoc(), "misaligned ARM call destination");
return 0;
}
@@ -568,10 +593,10 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_arm_thumb_cp:
// On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we
// could have an error on our hands.
- if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
+ if (!STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
- Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
return 0;
}
}
@@ -581,8 +606,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// CB instructions can only branch to offsets in [4, 126] in multiples of 2
// so ensure that the raw value LSB is zero and it lies in [2, 130].
// An offset of 2 will be relaxed to a NOP.
- if (Ctx && ((int64_t)Value < 2 || Value > 0x82 || Value & 1)) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if ((int64_t)Value < 2 || Value > 0x82 || Value & 1) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
// Offset by 4 and don't encode the lower bit, which is always 0.
@@ -592,21 +617,21 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
case ARM::fixup_arm_thumb_br:
// Offset by 4 and don't encode the lower bit, which is always 0.
- if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] &&
- !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) {
+ if (!STI->getFeatureBits()[ARM::FeatureThumb2] &&
+ !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
- Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
return 0;
}
}
return ((Value - 4) >> 1) & 0x7ff;
case ARM::fixup_arm_thumb_bcc:
// Offset by 4 and don't encode the lower bit, which is always 0.
- if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) {
+ if (!STI->getFeatureBits()[ARM::FeatureThumb2]) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
- Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
return 0;
}
}
@@ -620,8 +645,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
isAdd = false;
}
// The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
- if (Ctx && Value >= 256) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Value >= 256) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
Value = (Value & 0xf) | ((Value & 0xf0) << 4);
@@ -641,8 +666,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
// These values don't encode the low two bits since they're always zero.
Value >>= 2;
- if (Ctx && Value >= 256) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Value >= 256) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
Value |= isAdd << 23;
@@ -667,13 +692,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
isAdd = false;
}
// These values don't encode the low bit since it's always zero.
- if (Ctx && (Value & 1)) {
- Ctx->reportError(Fixup.getLoc(), "invalid value for this fixup");
+ if (Value & 1) {
+ Ctx.reportError(Fixup.getLoc(), "invalid value for this fixup");
return 0;
}
Value >>= 1;
- if (Ctx && Value >= 256) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Value >= 256) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
Value |= isAdd << 23;
@@ -687,38 +712,38 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
case ARM::fixup_arm_mod_imm:
Value = ARM_AM::getSOImmVal(Value);
- if (Ctx && Value >> 12) {
- Ctx->reportError(Fixup.getLoc(), "out of range immediate fixup value");
+ if (Value >> 12) {
+ Ctx.reportError(Fixup.getLoc(), "out of range immediate fixup value");
return 0;
}
return Value;
+ case ARM::fixup_t2_so_imm: {
+ Value = ARM_AM::getT2SOImmVal(Value);
+ if ((int64_t)Value < 0) {
+ Ctx.reportError(Fixup.getLoc(), "out of range immediate fixup value");
+ return 0;
+ }
+ // Value will contain a 12-bit value broken up into a 4-bit shift in bits
+ // 11:8 and the 8-bit immediate in 0:7. The instruction has the immediate
+ // in 0:7. The 4-bit shift is split up into i:imm3 where i is placed at bit
+ // 10 of the upper half-word and imm3 is placed at 14:12 of the lower
+ // half-word.
+ uint64_t EncValue = 0;
+ EncValue |= (Value & 0x800) << 15;
+ EncValue |= (Value & 0x700) << 4;
+ EncValue |= (Value & 0xff);
+ return swapHalfWords(EncValue, IsLittleEndian);
+ }
}
}
-void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFixup &Fixup,
- const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) {
+bool ARMAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target) {
const MCSymbolRefExpr *A = Target.getSymA();
const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
- // MachO (the only user of "Value") tries to make .o files that look vaguely
- // pre-linked, so for MOVW/MOVT and .word relocations they put the Thumb bit
- // into the addend if possible. Other relocation types don't want this bit
- // though (branches couldn't encode it if it *was* present, and no other
- // relocations exist) and it can interfere with checking valid expressions.
- if ((unsigned)Fixup.getKind() == FK_Data_4 ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_movw_lo16 ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_movt_hi16 ||
- (unsigned)Fixup.getKind() == ARM::fixup_t2_movw_lo16 ||
- (unsigned)Fixup.getKind() == ARM::fixup_t2_movt_hi16) {
- if (Sym) {
- if (Asm.isThumbFunc(Sym))
- Value |= 1;
- }
- }
- if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
+ const unsigned FixupKind = Fixup.getKind() ;
+ if ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
assert(Sym && "How did we resolve this?");
// If the symbol is external the linker will handle it.
@@ -726,23 +751,32 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// If the symbol is out of range, produce a relocation and hope the
// linker can handle it. GNU AS produces an error in this case.
- if (Sym->isExternal() || Value >= 0x400004)
- IsResolved = false;
+ if (Sym->isExternal())
+ return true;
+ }
+ // Create relocations for unconditional branches to function symbols with
+ // different execution mode in ELF binaries.
+ if (Sym && Sym->isELF()) {
+ unsigned Type = dyn_cast<MCSymbolELF>(Sym)->getType();
+ if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) {
+ if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch))
+ return true;
+ if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br ||
+ FixupKind == ARM::fixup_arm_thumb_bl ||
+ FixupKind == ARM::fixup_t2_condbranch ||
+ FixupKind == ARM::fixup_t2_uncondbranch))
+ return true;
+ }
}
// We must always generate a relocation for BL/BLX instructions if we have
// a symbol to reference, as the linker relies on knowing the destination
// symbol's thumb-ness to get interworking right.
- if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_blx ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl))
- IsResolved = false;
-
- // Try to get the encoded value for the fixup as-if we're mapping it into
- // the instruction. This allows adjustFixupValue() to issue a diagnostic
- // if the value aren't invalid.
- (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(),
- IsLittleEndian, IsResolved);
+ if (A && (FixupKind == ARM::fixup_arm_thumb_blx ||
+ FixupKind == ARM::fixup_arm_blx ||
+ FixupKind == ARM::fixup_arm_uncondbl ||
+ FixupKind == ARM::fixup_arm_condbl))
+ return true;
+ return false;
}
/// getFixupKindNumBytes - The number of bytes the fixup may change.
@@ -788,6 +822,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case ARM::fixup_arm_movw_lo16:
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_so_imm:
return 4;
case FK_SecRel_2:
@@ -840,28 +875,31 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movw_lo16:
case ARM::fixup_arm_mod_imm:
+ case ARM::fixup_t2_so_imm:
// Instruction size is 4 bytes.
return 4;
}
}
-void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data, uint64_t Value,
+ bool IsResolved) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- Value =
- adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian, true);
+ MCContext &Ctx = Asm.getContext();
+ Value = adjustFixupValue(Asm, Fixup, Target, Value, IsResolved, Ctx,
+ IsLittleEndian);
if (!Value)
return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+ assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
// Used to point to big endian bytes.
unsigned FullSizeBytes;
if (!IsLittleEndian) {
FullSizeBytes = getFixupKindContainerSizeBytes(Fixup.getKind());
- assert((Offset + FullSizeBytes) <= DataSize && "Invalid fixup size!");
+ assert((Offset + FullSizeBytes) <= Data.size() && "Invalid fixup size!");
assert(NumBytes <= FullSizeBytes && "Invalid fixup size!");
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 84caaac..0237496 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -38,19 +38,17 @@ public:
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
- /// processFixupValue - Target hook to process the literal value of a fixup
- /// if necessary.
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override;
-
- unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel,
- MCContext *Ctx, bool IsLittleEndian,
- bool IsResolved) const;
-
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override;
+
+ unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, uint64_t Value,
+ bool IsResolved, MCContext &Ctx,
+ bool IsLittleEndian) const;
+
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsResolved) const override;
unsigned getRelaxedOpcode(unsigned Op) const;
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index 09dc017..bd729fa 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -11,7 +11,7 @@
#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H
#include "ARMAsmBackend.h"
-#include "llvm/Support/MachO.h"
+#include "llvm/BinaryFormat/MachO.h"
namespace llvm {
class ARMAsmBackendDarwin : public ARMAsmBackend {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 088b420..92e553f 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -291,7 +291,11 @@ namespace ARMII {
/// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects
/// just that part of the flag set.
- MO_OPTION_MASK = 0x1f,
+ MO_OPTION_MASK = 0x0f,
+
+ /// MO_SBREL - On a symbol operand, this represents a static base relative
+ /// relocation. Used in movw and movt instructions.
+ MO_SBREL = 0x10,
/// MO_DLLIMPORT - On a symbol operand, this represents that the reference
/// to the symbol is for an import stub. This is used for DLL import
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 6f19754..59f31be 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -7,32 +7,32 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMFixupKinds.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
using namespace llvm;
namespace {
+
class ARMELFObjectWriter : public MCELFObjectTargetWriter {
enum { DefaultEABIVersion = 0x05000000U };
- unsigned GetRelocTypeInner(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const;
+ unsigned GetRelocTypeInner(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, MCContext &Ctx) const;
public:
ARMELFObjectWriter(uint8_t OSABI);
- ~ARMELFObjectWriter() override;
+ ~ARMELFObjectWriter() override = default;
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
@@ -40,15 +40,14 @@ namespace {
bool needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const override;
};
-}
+
+} // end anonymous namespace
ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI,
ELF::EM_ARM,
/*HasRelocationAddend*/ false) {}
-ARMELFObjectWriter::~ARMELFObjectWriter() {}
-
bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const {
// FIXME: This is extremely conservative. This really needs to use a
@@ -70,19 +69,20 @@ bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned ARMELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
- return GetRelocTypeInner(Target, Fixup, IsPCRel);
+ return GetRelocTypeInner(Target, Fixup, IsPCRel, Ctx);
}
unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
const MCFixup &Fixup,
- bool IsPCRel) const {
+ bool IsPCRel,
+ MCContext &Ctx) const {
MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
unsigned Type = 0;
if (IsPCRel) {
switch ((unsigned)Fixup.getKind()) {
default:
- report_fatal_error("unsupported relocation on symbol");
+ Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol");
return ELF::R_ARM_NONE;
case FK_Data_4:
switch (Modifier) {
@@ -161,7 +161,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
} else {
switch ((unsigned)Fixup.getKind()) {
default:
- report_fatal_error("unsupported relocation on symbol");
+ Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol");
return ELF::R_ARM_NONE;
case FK_Data_1:
switch (Modifier) {
@@ -270,10 +270,26 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
}
break;
case ARM::fixup_t2_movt_hi16:
- Type = ELF::R_ARM_THM_MOVT_ABS;
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_THM_MOVT_ABS;
+ break;
+ case MCSymbolRefExpr::VK_ARM_SBREL:
+ Type = ELF:: R_ARM_THM_MOVT_BREL;
+ break;
+ }
break;
case ARM::fixup_t2_movw_lo16:
- Type = ELF::R_ARM_THM_MOVW_ABS_NC;
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_THM_MOVW_ABS_NC;
+ break;
+ case MCSymbolRefExpr::VK_ARM_SBREL:
+ Type = ELF:: R_ARM_THM_MOVW_BREL_NC;
+ break;
+ }
break;
}
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index f6bb35d..93f4006 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -15,8 +15,13 @@
#include "ARMRegisterInfo.h"
#include "ARMUnwindOpAsm.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
@@ -24,25 +29,32 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SectionKind.h"
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/ARMEHABI.h"
-#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <string>
using namespace llvm;
@@ -101,16 +113,21 @@ ARMTargetAsmStreamer::ARMTargetAsmStreamer(MCStreamer &S,
bool VerboseAsm)
: ARMTargetStreamer(S), OS(OS), InstPrinter(InstPrinter),
IsVerboseAsm(VerboseAsm) {}
+
void ARMTargetAsmStreamer::emitFnStart() { OS << "\t.fnstart\n"; }
void ARMTargetAsmStreamer::emitFnEnd() { OS << "\t.fnend\n"; }
void ARMTargetAsmStreamer::emitCantUnwind() { OS << "\t.cantunwind\n"; }
+
void ARMTargetAsmStreamer::emitPersonality(const MCSymbol *Personality) {
OS << "\t.personality " << Personality->getName() << '\n';
}
+
void ARMTargetAsmStreamer::emitPersonalityIndex(unsigned Index) {
OS << "\t.personalityindex " << Index << '\n';
}
+
void ARMTargetAsmStreamer::emitHandlerData() { OS << "\t.handlerdata\n"; }
+
void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
int64_t Offset) {
OS << "\t.setfp\t";
@@ -121,6 +138,7 @@ void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
OS << ", #" << Offset;
OS << '\n';
}
+
void ARMTargetAsmStreamer::emitMovSP(unsigned Reg, int64_t Offset) {
assert((Reg != ARM::SP && Reg != ARM::PC) &&
"the operand of .movsp cannot be either sp or pc");
@@ -131,9 +149,11 @@ void ARMTargetAsmStreamer::emitMovSP(unsigned Reg, int64_t Offset) {
OS << ", #" << Offset;
OS << '\n';
}
+
void ARMTargetAsmStreamer::emitPad(int64_t Offset) {
OS << "\t.pad\t#" << Offset << '\n';
}
+
void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
bool isVector) {
assert(RegList.size() && "RegList should not be empty");
@@ -151,8 +171,9 @@ void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
OS << "}\n";
}
-void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) {
-}
+
+void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) {}
+
void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
OS << "\t.eabi_attribute\t" << Attribute << ", " << Twine(Value);
if (IsVerboseAsm) {
@@ -162,6 +183,7 @@ void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
}
OS << "\n";
}
+
void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute,
StringRef String) {
switch (Attribute) {
@@ -179,6 +201,7 @@ void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute,
}
OS << "\n";
}
+
void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute,
unsigned IntValue,
StringRef StringValue) {
@@ -194,20 +217,25 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute,
}
OS << "\n";
}
+
void ARMTargetAsmStreamer::emitArch(unsigned Arch) {
OS << "\t.arch\t" << ARM::getArchName(Arch) << "\n";
}
+
void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) {
OS << "\t.arch_extension\t" << ARM::getArchExtName(ArchExt) << "\n";
}
+
void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) {
OS << "\t.object_arch\t" << ARM::getArchName(Arch) << '\n';
}
+
void ARMTargetAsmStreamer::emitFPU(unsigned FPU) {
OS << "\t.fpu\t" << ARM::getFPUName(FPU) << "\n";
}
-void ARMTargetAsmStreamer::finishAttributeSection() {
-}
+
+void ARMTargetAsmStreamer::finishAttributeSection() {}
+
void
ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
OS << "\t.tlsdescseq\t" << S->getSymbol().getName();
@@ -274,12 +302,12 @@ private:
};
StringRef CurrentVendor;
- unsigned FPU;
- unsigned Arch;
- unsigned EmittedArch;
+ unsigned FPU = ARM::FK_INVALID;
+ unsigned Arch = ARM::AK_INVALID;
+ unsigned EmittedArch = ARM::AK_INVALID;
SmallVector<AttributeItem, 64> Contents;
- MCSection *AttributeSection;
+ MCSection *AttributeSection = nullptr;
AttributeItem *getAttributeItem(unsigned Attribute) {
for (size_t i = 0; i < Contents.size(); ++i)
@@ -393,9 +421,7 @@ private:
public:
ARMTargetELFStreamer(MCStreamer &S)
- : ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::FK_INVALID),
- Arch(ARM::AK_INVALID), EmittedArch(ARM::AK_INVALID),
- AttributeSection(nullptr) {}
+ : ARMTargetStreamer(S), CurrentVendor("aeabi") {}
};
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
@@ -416,12 +442,11 @@ public:
ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS,
MCCodeEmitter *Emitter, bool IsThumb)
- : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb),
- MappingSymbolCounter(0), LastEMS(EMS_None) {
+ : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb) {
EHReset();
}
- ~ARMELFStreamer() {}
+ ~ARMELFStreamer() override = default;
void FinishImpl() override;
@@ -439,20 +464,21 @@ public:
void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes);
void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
- // We have to keep track of the mapping symbol state of any sections we
- // use. Each one should start off as EMS_None, which is provided as the
- // default constructor by DenseMap::lookup.
- LastMappingSymbols[getPreviousSection().first] = LastEMS;
- LastEMS = LastMappingSymbols.lookup(Section);
-
+ LastMappingSymbols[getCurrentSection().first] = std::move(LastEMSInfo);
MCELFStreamer::ChangeSection(Section, Subsection);
+ auto LastMappingSymbol = LastMappingSymbols.find(Section);
+ if (LastMappingSymbol != LastMappingSymbols.end()) {
+ LastEMSInfo = std::move(LastMappingSymbol->second);
+ return;
+ }
+ LastEMSInfo.reset(new ElfMappingSymbolInfo(SMLoc(), nullptr, 0));
}
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
- void EmitInstruction(const MCInst& Inst,
- const MCSubtargetInfo &STI) override {
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override {
if (IsThumb)
EmitThumbMappingSymbol();
else
@@ -507,15 +533,25 @@ public:
MCELFStreamer::EmitBytes(Data);
}
+ void FlushPendingMappingSymbol() {
+ if (!LastEMSInfo->hasInfo())
+ return;
+ ElfMappingSymbolInfo *EMS = LastEMSInfo.get();
+ EmitMappingSymbol("$d", EMS->Loc, EMS->F, EMS->Offset);
+ EMS->resetInfo();
+ }
+
/// This is one of the functions used to emit data into an ELF section, so the
/// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
/// necessary.
void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
- if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value))
+ if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value)) {
if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4)) {
getContext().reportError(Loc, "relocated expression must be 32-bit");
return;
}
+ getOrCreateDataFragment();
+ }
EmitDataMappingSymbol();
MCELFStreamer::EmitValueImpl(Value, Size, Loc);
@@ -548,22 +584,54 @@ private:
EMS_Data
};
+ struct ElfMappingSymbolInfo {
+ explicit ElfMappingSymbolInfo(SMLoc Loc, MCFragment *F, uint64_t O)
+ : Loc(Loc), F(F), Offset(O), State(EMS_None) {}
+ void resetInfo() {
+ F = nullptr;
+ Offset = 0;
+ }
+ bool hasInfo() { return F != nullptr; }
+ SMLoc Loc;
+ MCFragment *F;
+ uint64_t Offset;
+ ElfMappingSymbol State;
+ };
+
void EmitDataMappingSymbol() {
- if (LastEMS == EMS_Data) return;
+ if (LastEMSInfo->State == EMS_Data)
+ return;
+ else if (LastEMSInfo->State == EMS_None) {
+ // This is a tentative symbol, it won't really be emitted until it's
+ // actually needed.
+ ElfMappingSymbolInfo *EMS = LastEMSInfo.get();
+ auto *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+ if (!DF)
+ return;
+ EMS->Loc = SMLoc();
+ EMS->F = getCurrentFragment();
+ EMS->Offset = DF->getContents().size();
+ LastEMSInfo->State = EMS_Data;
+ return;
+ }
EmitMappingSymbol("$d");
- LastEMS = EMS_Data;
+ LastEMSInfo->State = EMS_Data;
}
void EmitThumbMappingSymbol() {
- if (LastEMS == EMS_Thumb) return;
+ if (LastEMSInfo->State == EMS_Thumb)
+ return;
+ FlushPendingMappingSymbol();
EmitMappingSymbol("$t");
- LastEMS = EMS_Thumb;
+ LastEMSInfo->State = EMS_Thumb;
}
void EmitARMMappingSymbol() {
- if (LastEMS == EMS_ARM) return;
+ if (LastEMSInfo->State == EMS_ARM)
+ return;
+ FlushPendingMappingSymbol();
EmitMappingSymbol("$a");
- LastEMS = EMS_ARM;
+ LastEMSInfo->State = EMS_ARM;
}
void EmitMappingSymbol(StringRef Name) {
@@ -576,6 +644,17 @@ private:
Symbol->setExternal(false);
}
+ void EmitMappingSymbol(StringRef Name, SMLoc Loc, MCFragment *F,
+ uint64_t Offset) {
+ auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
+ Name + "." + Twine(MappingSymbolCounter++)));
+ EmitLabel(Symbol, Loc, F);
+ Symbol->setType(ELF::STT_NOTYPE);
+ Symbol->setBinding(ELF::STB_LOCAL);
+ Symbol->setExternal(false);
+ Symbol->setOffset(Offset);
+ }
+
void EmitThumbFunc(MCSymbol *Func) override {
getAssembler().setIsThumbFunc(Func);
EmitSymbolAttribute(Func, MCSA_ELF_TypeFunction);
@@ -599,10 +678,12 @@ private:
void EmitFixup(const MCExpr *Expr, MCFixupKind Kind);
bool IsThumb;
- int64_t MappingSymbolCounter;
+ int64_t MappingSymbolCounter = 0;
+
+ DenseMap<const MCSection *, std::unique_ptr<ElfMappingSymbolInfo>>
+ LastMappingSymbols;
- DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
- ElfMappingSymbol LastEMS;
+ std::unique_ptr<ElfMappingSymbolInfo> LastEMSInfo;
// ARM Exception Handling Frame Information
MCSymbol *ExTab;
@@ -618,6 +699,7 @@ private:
SmallVector<uint8_t, 64> Opcodes;
UnwindOpcodeAssembler UnwindOpAsm;
};
+
} // end anonymous namespace
ARMELFStreamer &ARMTargetELFStreamer::getStreamer() {
@@ -627,33 +709,42 @@ ARMELFStreamer &ARMTargetELFStreamer::getStreamer() {
void ARMTargetELFStreamer::emitFnStart() { getStreamer().emitFnStart(); }
void ARMTargetELFStreamer::emitFnEnd() { getStreamer().emitFnEnd(); }
void ARMTargetELFStreamer::emitCantUnwind() { getStreamer().emitCantUnwind(); }
+
void ARMTargetELFStreamer::emitPersonality(const MCSymbol *Personality) {
getStreamer().emitPersonality(Personality);
}
+
void ARMTargetELFStreamer::emitPersonalityIndex(unsigned Index) {
getStreamer().emitPersonalityIndex(Index);
}
+
void ARMTargetELFStreamer::emitHandlerData() {
getStreamer().emitHandlerData();
}
+
void ARMTargetELFStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
int64_t Offset) {
getStreamer().emitSetFP(FpReg, SpReg, Offset);
}
+
void ARMTargetELFStreamer::emitMovSP(unsigned Reg, int64_t Offset) {
getStreamer().emitMovSP(Reg, Offset);
}
+
void ARMTargetELFStreamer::emitPad(int64_t Offset) {
getStreamer().emitPad(Offset);
}
+
void ARMTargetELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
bool isVector) {
getStreamer().emitRegSave(RegList, isVector);
}
+
void ARMTargetELFStreamer::emitUnwindRaw(int64_t Offset,
const SmallVectorImpl<uint8_t> &Opcodes) {
getStreamer().emitUnwindRaw(Offset, Opcodes);
}
+
void ARMTargetELFStreamer::switchVendor(StringRef Vendor) {
assert(!Vendor.empty() && "Vendor cannot be empty.");
@@ -668,25 +759,31 @@ void ARMTargetELFStreamer::switchVendor(StringRef Vendor) {
CurrentVendor = Vendor;
}
+
void ARMTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
}
+
void ARMTargetELFStreamer::emitTextAttribute(unsigned Attribute,
StringRef Value) {
setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
}
+
void ARMTargetELFStreamer::emitIntTextAttribute(unsigned Attribute,
unsigned IntValue,
StringRef StringValue) {
setAttributeItems(Attribute, IntValue, StringValue,
/* OverwriteExisting= */ true);
}
+
void ARMTargetELFStreamer::emitArch(unsigned Value) {
Arch = Value;
}
+
void ARMTargetELFStreamer::emitObjectArch(unsigned Value) {
EmittedArch = Value;
}
+
void ARMTargetELFStreamer::emitArchDefaultAttributes() {
using namespace ARMBuildAttrs;
@@ -786,9 +883,11 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
break;
}
}
+
void ARMTargetELFStreamer::emitFPU(unsigned Value) {
FPU = Value;
}
+
void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
switch (FPU) {
case ARM::FK_VFP:
@@ -920,6 +1019,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
break;
}
}
+
size_t ARMTargetELFStreamer::calculateContentSize() const {
size_t Result = 0;
for (size_t i = 0; i < Contents.size(); ++i) {
@@ -944,6 +1044,7 @@ size_t ARMTargetELFStreamer::calculateContentSize() const {
}
return Result;
}
+
void ARMTargetELFStreamer::finishAttributeSection() {
// <format-version>
// [ <section-length> "vendor-name"
@@ -1093,9 +1194,9 @@ inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix,
const MCSymbolELF *Group = FnSection.getGroup();
if (Group)
Flags |= ELF::SHF_GROUP;
- MCSectionELF *EHSection =
- getContext().getELFSection(EHSecName, Type, Flags, 0, Group,
- FnSection.getUniqueID(), nullptr, &FnSection);
+ MCSectionELF *EHSection = getContext().getELFSection(
+ EHSecName, Type, Flags, 0, Group, FnSection.getUniqueID(),
+ static_cast<const MCSymbolELF *>(&Fn));
assert(EHSection && "Failed to get the required EH section");
@@ -1114,6 +1215,7 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER,
SectionKind::getData(), FnStart);
}
+
void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
MCDataFragment *Frag = getOrCreateDataFragment();
Frag->getFixups().push_back(MCFixup::create(Frag->getContents().size(), Expr,
@@ -1396,8 +1498,6 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
return S;
- }
-
}
-
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 3fe2302..831589b 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -15,55 +15,47 @@
namespace llvm {
namespace ARM {
enum Fixups {
- // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol
- // addresses
+ // 12-bit PC relative relocation for symbol addresses
fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind,
- // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with
- // the 16-bit halfwords reordered.
+ // Equivalent to fixup_arm_ldst_pcrel_12, with the 16-bit halfwords reordered.
fixup_t2_ldst_pcrel_12,
- // fixup_arm_pcrel_10_unscaled - 10-bit PC relative relocation for symbol
- // addresses used in LDRD/LDRH/LDRB/etc. instructions. All bits are encoded.
+ // 10-bit PC relative relocation for symbol addresses used in
+ // LDRD/LDRH/LDRB/etc. instructions. All bits are encoded.
fixup_arm_pcrel_10_unscaled,
- // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses
- // used in VFP instructions where the lower 2 bits are not encoded
- // (so it's encoded as an 8-bit immediate).
+ // 10-bit PC relative relocation for symbol addresses used in VFP instructions
+ // where the lower 2 bits are not encoded (so it's encoded as an 8-bit
+ // immediate).
fixup_arm_pcrel_10,
- // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for
- // the short-swapped encoding of Thumb2 instructions.
+ // Equivalent to fixup_arm_pcrel_10, accounting for the short-swapped encoding
+ // of Thumb2 instructions.
fixup_t2_pcrel_10,
- // fixup_arm_pcrel_9 - 9-bit PC relative relocation for symbol addresses
- // used in VFP instructions where bit 0 not encoded (so it's encoded as an
- // 8-bit immediate).
+ // 9-bit PC relative relocation for symbol addresses used in VFP instructions
+ // where bit 0 not encoded (so it's encoded as an 8-bit immediate).
fixup_arm_pcrel_9,
- // fixup_t2_pcrel_9 - Equivalent to fixup_arm_pcrel_9, accounting for
- // the short-swapped encoding of Thumb2 instructions.
+ // Equivalent to fixup_arm_pcrel_9, accounting for the short-swapped encoding
+ // of Thumb2 instructions.
fixup_t2_pcrel_9,
- // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol
- // addresses where the lower 2 bits are not encoded (so it's encoded as an
- // 8-bit immediate).
+ // 10-bit PC relative relocation for symbol addresses where the lower 2 bits
+ // are not encoded (so it's encoded as an 8-bit immediate).
fixup_thumb_adr_pcrel_10,
- // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
- // instruction.
+ // 12-bit PC relative relocation for the ADR instruction.
fixup_arm_adr_pcrel_12,
- // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
- // instruction.
+ // 12-bit PC relative relocation for the ADR instruction.
fixup_t2_adr_pcrel_12,
- // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch
- // instructions.
+ // 24-bit PC relative relocation for conditional branch instructions.
fixup_arm_condbranch,
- // fixup_arm_uncondbranch - 24-bit PC relative relocation for
- // branch instructions. (unconditional)
+ // 24-bit PC relative relocation for branch instructions. (unconditional)
fixup_arm_uncondbranch,
- // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct
- // uconditional branch instructions.
+ // 20-bit PC relative relocation for Thumb2 direct uconditional branch
+ // instructions.
fixup_t2_condbranch,
- // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct
- // branch unconditional branch instructions.
+ // 20-bit PC relative relocation for Thumb2 direct branch unconditional branch
+ // instructions.
fixup_t2_uncondbranch,
- // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions.
+ // 12-bit fixup for Thumb B instructions.
fixup_arm_thumb_br,
// The following fixups handle the ARM BL instructions. These can be
@@ -75,46 +67,48 @@ enum Fixups {
// MachO does not draw a distinction between the two cases, so it will treat
// fixup_arm_uncondbl and fixup_arm_condbl as identical fixups.
- // fixup_arm_uncondbl - Fixup for unconditional ARM BL instructions.
+ // Fixup for unconditional ARM BL instructions.
fixup_arm_uncondbl,
- // fixup_arm_condbl - Fixup for ARM BL instructions with nontrivial
- // conditionalisation.
+ // Fixup for ARM BL instructions with nontrivial conditionalisation.
fixup_arm_condbl,
- // fixup_arm_blx - Fixup for ARM BLX instructions.
+ // Fixup for ARM BLX instructions.
fixup_arm_blx,
- // fixup_arm_thumb_bl - Fixup for Thumb BL instructions.
+ // Fixup for Thumb BL instructions.
fixup_arm_thumb_bl,
- // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions.
+ // Fixup for Thumb BLX instructions.
fixup_arm_thumb_blx,
- // fixup_arm_thumb_cb - Fixup for Thumb branch instructions.
+ // Fixup for Thumb branch instructions.
fixup_arm_thumb_cb,
- // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs.
+ // Fixup for Thumb load/store from constant pool instrs.
fixup_arm_thumb_cp,
- // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions.
+ // Fixup for Thumb conditional branching instructions.
fixup_arm_thumb_bcc,
// The next two are for the movt/movw pair
// the 16bit imm field are split into imm{15-12} and imm{11-0}
fixup_arm_movt_hi16, // :upper16:
fixup_arm_movw_lo16, // :lower16:
- fixup_t2_movt_hi16, // :upper16:
- fixup_t2_movw_lo16, // :lower16:
+ fixup_t2_movt_hi16, // :upper16:
+ fixup_t2_movw_lo16, // :lower16:
- // fixup_arm_mod_imm - Fixup for mod_imm
+ // Fixup for mod_imm
fixup_arm_mod_imm,
+ // Fixup for Thumb2 8-bit rotated operand
+ fixup_t2_so_imm,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
}
-}
+} // namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 559a4f8..f1f35f4 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -11,22 +11,33 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
#include "MCTargetDesc/ARMMCExpr.h"
#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
using namespace llvm;
@@ -36,9 +47,8 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
namespace {
+
class ARMMCCodeEmitter : public MCCodeEmitter {
- ARMMCCodeEmitter(const ARMMCCodeEmitter &) = delete;
- void operator=(const ARMMCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
const MCContext &CTX;
bool IsLittleEndian;
@@ -47,15 +57,18 @@ public:
ARMMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool IsLittle)
: MCII(mcii), CTX(ctx), IsLittleEndian(IsLittle) {
}
-
- ~ARMMCCodeEmitter() override {}
+ ARMMCCodeEmitter(const ARMMCCodeEmitter &) = delete;
+ ARMMCCodeEmitter &operator=(const ARMMCCodeEmitter &) = delete;
+ ~ARMMCCodeEmitter() override = default;
bool isThumb(const MCSubtargetInfo &STI) const {
return STI.getFeatureBits()[ARM::ModeThumb];
}
+
bool isThumb2(const MCSubtargetInfo &STI) const {
return isThumb(STI) && STI.getFeatureBits()[ARM::FeatureThumb2];
}
+
bool isTargetMachO(const MCSubtargetInfo &STI) const {
const Triple &TT = STI.getTargetTriple();
return TT.isOSBinFormatMachO();
@@ -200,6 +213,7 @@ public:
case ARM_AM::ib: return 3;
}
}
+
/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
///
unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const {
@@ -273,7 +287,6 @@ public:
unsigned getSOImmOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(Op);
// We expect MO to be an immediate or an expression,
@@ -326,7 +339,17 @@ public:
unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- unsigned SoImm = MI.getOperand(Op).getImm();
+ const MCOperand &MO = MI.getOperand(Op);
+
+ // Support for fixups (MCFixup)
+ if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
+ // Fixups resolve to plain values that need to be encoded.
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_so_imm);
+ Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
+ return 0;
+ }
+ unsigned SoImm = MO.getImm();
unsigned Encoded = ARM_AM::getT2SOImmVal(SoImm);
assert(Encoded != ~0U && "Not a Thumb2 so_imm value?");
return Encoded;
@@ -432,18 +455,6 @@ public:
} // end anonymous namespace
-MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new ARMMCCodeEmitter(MCII, Ctx, true);
-}
-
-MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new ARMMCCodeEmitter(MCII, Ctx, false);
-}
-
/// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing
/// instructions, and rewrite them to their Thumb2 form if we are currently in
/// Thumb2 mode.
@@ -550,7 +561,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
bool ARMMCCodeEmitter::
EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
@@ -1515,7 +1526,7 @@ getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
uint32_t v = ~MO.getImm();
uint32_t lsb = countTrailingZeros(v);
uint32_t msb = (32 - countLeadingZeros (v)) - 1;
- assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
+ assert(v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
return lsb | (msb << 5);
}
@@ -1700,3 +1711,15 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
}
#include "ARMGenMCCodeEmitter.inc"
+
+MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new ARMMCCodeEmitter(MCII, Ctx, true);
+}
+
+MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new ARMMCCodeEmitter(MCII, Ctx, false);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 9e4d202..b8a8b1f 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMMCTargetDesc.h"
#include "ARMBaseInfo.h"
#include "ARMMCAsmInfo.h"
-#include "ARMMCTargetDesc.h"
#include "InstPrinter/ARMInstPrinter.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCELFStreamer.h"
@@ -260,18 +260,37 @@ public:
return false;
int64_t Imm = Inst.getOperand(0).getImm();
- // FIXME: This is not right for thumb.
Target = Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
return true;
}
};
+class ThumbMCInstrAnalysis : public ARMMCInstrAnalysis {
+public:
+ ThumbMCInstrAnalysis(const MCInstrInfo *Info) : ARMMCInstrAnalysis(Info) {}
+
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size, uint64_t &Target) const override {
+ // We only handle PCRel branches for now.
+ if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
+ return false;
+
+ int64_t Imm = Inst.getOperand(0).getImm();
+ Target = Addr+Imm+4; // In Thumb mode the PC is always off by 4 bytes.
+ return true;
+ }
+};
+
}
static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
return new ARMMCInstrAnalysis(Info);
}
+static MCInstrAnalysis *createThumbMCInstrAnalysis(const MCInstrInfo *Info) {
+ return new ThumbMCInstrAnalysis(Info);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeARMTargetMC() {
for (Target *T : {&getTheARMLETarget(), &getTheARMBETarget(),
@@ -289,9 +308,6 @@ extern "C" void LLVMInitializeARMTargetMC() {
TargetRegistry::RegisterMCSubtargetInfo(*T,
ARM_MC::createARMMCSubtargetInfo);
- // Register the MC instruction analyzer.
- TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis);
-
TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
TargetRegistry::RegisterCOFFStreamer(*T, createARMWinCOFFStreamer);
TargetRegistry::RegisterMachOStreamer(*T, createARMMachOStreamer);
@@ -313,6 +329,12 @@ extern "C" void LLVMInitializeARMTargetMC() {
TargetRegistry::RegisterMCRelocationInfo(*T, createARMMCRelocationInfo);
}
+ // Register the MC instruction analyzer.
+ for (Target *T : {&getTheARMLETarget(), &getTheARMBETarget()})
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis);
+ for (Target *T : {&getTheThumbLETarget(), &getTheThumbBETarget()})
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createThumbMCInstrAnalysis);
+
// Register the MC Code Emitter
for (Target *T : {&getTheARMLETarget(), &getTheThumbLETarget()})
TargetRegistry::RegisterMCCodeEmitter(*T, createARMLEMCCodeEmitter);
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
index 482bcf9..5516a1b 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARMMachORelocationInfo.cpp ----------------------------------------===//
+//===- ARMMachORelocationInfo.cpp -----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,17 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "ARMMCExpr.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm-c/Disassembler.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
+#include "llvm/MC/MCExpr.h"
using namespace llvm;
-using namespace object;
namespace {
+
class ARMMachORelocationInfo : public MCRelocationInfo {
public:
ARMMachORelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {}
@@ -35,7 +35,8 @@ public:
}
}
};
-} // End unnamed namespace
+
+} // end anonymous namespace
/// createARMMachORelocationInfo - Construct an ARM Mach-O RelocationInfo.
MCRelocationInfo *llvm::createARMMachORelocationInfo(MCContext &Ctx) {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index b77181f..4a8139d 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -7,10 +7,11 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -21,7 +22,6 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
using namespace llvm;
namespace {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index e49f1a3..4a94318 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -10,20 +10,25 @@
// This file implements the ARMTargetStreamer class.
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/MapVector.h"
+
+#include "ARMTargetMachine.h"
#include "llvm/MC/ConstantPools.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/TargetParser.h"
using namespace llvm;
+
//
// ARMTargetStreamer Implemenation
//
+
ARMTargetStreamer::ARMTargetStreamer(MCStreamer &S)
: MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {}
-ARMTargetStreamer::~ARMTargetStreamer() {}
+ARMTargetStreamer::~ARMTargetStreamer() = default;
// The constant pool handling is shared by all ARMTargetStreamer
// implementations.
@@ -74,5 +79,180 @@ void ARMTargetStreamer::finishAttributeSection() {}
void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {}
void
ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {}
-
void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {}
+
+static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) {
+ if (STI.getCPU() == "xscale")
+ return ARMBuildAttrs::v5TEJ;
+
+ if (STI.hasFeature(ARM::HasV8Ops)) {
+ if (STI.hasFeature(ARM::FeatureRClass))
+ return ARMBuildAttrs::v8_R;
+ return ARMBuildAttrs::v8_A;
+ } else if (STI.hasFeature(ARM::HasV8MMainlineOps))
+ return ARMBuildAttrs::v8_M_Main;
+ else if (STI.hasFeature(ARM::HasV7Ops)) {
+ if (STI.hasFeature(ARM::FeatureMClass) && STI.hasFeature(ARM::FeatureDSP))
+ return ARMBuildAttrs::v7E_M;
+ return ARMBuildAttrs::v7;
+ } else if (STI.hasFeature(ARM::HasV6T2Ops))
+ return ARMBuildAttrs::v6T2;
+ else if (STI.hasFeature(ARM::HasV8MBaselineOps))
+ return ARMBuildAttrs::v8_M_Base;
+ else if (STI.hasFeature(ARM::HasV6MOps))
+ return ARMBuildAttrs::v6S_M;
+ else if (STI.hasFeature(ARM::HasV6Ops))
+ return ARMBuildAttrs::v6;
+ else if (STI.hasFeature(ARM::HasV5TEOps))
+ return ARMBuildAttrs::v5TE;
+ else if (STI.hasFeature(ARM::HasV5TOps))
+ return ARMBuildAttrs::v5T;
+ else if (STI.hasFeature(ARM::HasV4TOps))
+ return ARMBuildAttrs::v4T;
+ else
+ return ARMBuildAttrs::v4;
+}
+
+static bool isV8M(const MCSubtargetInfo &STI) {
+ // Note that v8M Baseline is a subset of v6T2!
+ return (STI.hasFeature(ARM::HasV8MBaselineOps) &&
+ !STI.hasFeature(ARM::HasV6T2Ops)) ||
+ STI.hasFeature(ARM::HasV8MMainlineOps);
+}
+
+/// Emit the build attributes that only depend on the hardware that we expect
+// /to be available, and not on the ABI, or any source-language choices.
+void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
+ switchVendor("aeabi");
+
+ const StringRef CPUString = STI.getCPU();
+ if (!CPUString.empty() && !CPUString.startswith("generic")) {
+ // FIXME: remove krait check when GNU tools support krait cpu
+ if (STI.hasFeature(ARM::ProcKrait)) {
+ emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
+ // We consider krait as a "cortex-a9" + hwdiv CPU
+ // Enable hwdiv through ".arch_extension idiv"
+ if (STI.hasFeature(ARM::FeatureHWDivThumb) ||
+ STI.hasFeature(ARM::FeatureHWDivARM))
+ emitArchExtension(ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM);
+ } else {
+ emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
+ }
+ }
+
+ emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(STI));
+
+ if (STI.hasFeature(ARM::FeatureAClass)) {
+ emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
+ } else if (STI.hasFeature(ARM::FeatureRClass)) {
+ emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::RealTimeProfile);
+ } else if (STI.hasFeature(ARM::FeatureMClass)) {
+ emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::MicroControllerProfile);
+ }
+
+ emitAttribute(ARMBuildAttrs::ARM_ISA_use, STI.hasFeature(ARM::FeatureNoARM)
+ ? ARMBuildAttrs::Not_Allowed
+ : ARMBuildAttrs::Allowed);
+
+ if (isV8M(STI)) {
+ emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumbDerived);
+ } else if (STI.hasFeature(ARM::FeatureThumb2)) {
+ emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
+ } else if (STI.hasFeature(ARM::HasV4TOps)) {
+ emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed);
+ }
+
+ if (STI.hasFeature(ARM::FeatureNEON)) {
+ /* NEON is not exactly a VFP architecture, but GAS emit one of
+ * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
+ if (STI.hasFeature(ARM::FeatureFPARMv8)) {
+ if (STI.hasFeature(ARM::FeatureCrypto))
+ emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8);
+ else
+ emitFPU(ARM::FK_NEON_FP_ARMV8);
+ } else if (STI.hasFeature(ARM::FeatureVFP4))
+ emitFPU(ARM::FK_NEON_VFPV4);
+ else
+ emitFPU(STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_NEON_FP16
+ : ARM::FK_NEON);
+ // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
+ if (STI.hasFeature(ARM::HasV8Ops))
+ emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+ STI.hasFeature(ARM::HasV8_1aOps)
+ ? ARMBuildAttrs::AllowNeonARMv8_1a
+ : ARMBuildAttrs::AllowNeonARMv8);
+ } else {
+ if (STI.hasFeature(ARM::FeatureFPARMv8))
+ // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
+ // FPU, but there are two different names for it depending on the CPU.
+ emitFPU(STI.hasFeature(ARM::FeatureD16)
+ ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16
+ : ARM::FK_FPV5_D16)
+ : ARM::FK_FP_ARMV8);
+ else if (STI.hasFeature(ARM::FeatureVFP4))
+ emitFPU(STI.hasFeature(ARM::FeatureD16)
+ ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16
+ : ARM::FK_VFPV4_D16)
+ : ARM::FK_VFPV4);
+ else if (STI.hasFeature(ARM::FeatureVFP3))
+ emitFPU(
+ STI.hasFeature(ARM::FeatureD16)
+ // +d16
+ ? (STI.hasFeature(ARM::FeatureVFPOnlySP)
+ ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16
+ : ARM::FK_VFPV3XD)
+ : (STI.hasFeature(ARM::FeatureFP16)
+ ? ARM::FK_VFPV3_D16_FP16
+ : ARM::FK_VFPV3_D16))
+ // -d16
+ : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16
+ : ARM::FK_VFPV3));
+ else if (STI.hasFeature(ARM::FeatureVFP2))
+ emitFPU(ARM::FK_VFPV2);
+ }
+
+ // ABI_HardFP_use attribute to indicate single precision FP.
+ if (STI.hasFeature(ARM::FeatureVFPOnlySP))
+ emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
+ ARMBuildAttrs::HardFPSinglePrecision);
+
+ if (STI.hasFeature(ARM::FeatureFP16))
+ emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
+
+ if (STI.hasFeature(ARM::FeatureMP))
+ emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
+
+ // Hardware divide in ARM mode is part of base arch, starting from ARMv8.
+ // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M).
+ // It is not possible to produce DisallowDIV: if hwdiv is present in the base
+ // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits.
+ // AllowDIVExt is only emitted if hwdiv isn't available in the base arch;
+ // otherwise, the default value (AllowDIVIfExists) applies.
+ if (STI.hasFeature(ARM::FeatureHWDivARM) && !STI.hasFeature(ARM::HasV8Ops))
+ emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
+
+ if (STI.hasFeature(ARM::FeatureDSP) && isV8M(STI))
+ emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed);
+
+ if (STI.hasFeature(ARM::FeatureStrictAlign))
+ emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+ ARMBuildAttrs::Not_Allowed);
+ else
+ emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+ ARMBuildAttrs::Allowed);
+
+ if (STI.hasFeature(ARM::FeatureTrustZone) &&
+ STI.hasFeature(ARM::FeatureVirtualization))
+ emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowTZVirtualization);
+ else if (STI.hasFeature(ARM::FeatureTrustZone))
+ emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZ);
+ else if (STI.hasFeature(ARM::FeatureVirtualization))
+ emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowVirtualization);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 173cc93..d3ab83b 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -14,12 +14,14 @@
#include "ARMUnwindOpAsm.h"
#include "llvm/Support/ARMEHABI.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
using namespace llvm;
namespace {
+
/// UnwindOpcodeStreamer - The simple wrapper over SmallVector to emit bytes
/// with MSB to LSB per uint32_t ordering. For example, the first byte will
/// be placed in Vec[3], and the following bytes will be placed in 2, 1, 0,
@@ -27,20 +29,19 @@ namespace {
class UnwindOpcodeStreamer {
private:
SmallVectorImpl<uint8_t> &Vec;
- size_t Pos;
+ size_t Pos = 3;
public:
- UnwindOpcodeStreamer(SmallVectorImpl<uint8_t> &V) : Vec(V), Pos(3) {
- }
+ UnwindOpcodeStreamer(SmallVectorImpl<uint8_t> &V) : Vec(V) {}
/// Emit the byte in MSB to LSB per uint32_t order.
- inline void EmitByte(uint8_t elem) {
+ void EmitByte(uint8_t elem) {
Vec[Pos] = elem;
Pos = (((Pos ^ 0x3u) + 1) ^ 0x3u);
}
/// Emit the size prefix.
- inline void EmitSize(size_t Size) {
+ void EmitSize(size_t Size) {
size_t SizeInWords = (Size + 3) / 4;
assert(SizeInWords <= 0x100u &&
"Only 256 additional words are allowed for unwind opcodes");
@@ -48,19 +49,20 @@ namespace {
}
/// Emit the personality index prefix.
- inline void EmitPersonalityIndex(unsigned PI) {
+ void EmitPersonalityIndex(unsigned PI) {
assert(PI < ARM::EHABI::NUM_PERSONALITY_INDEX &&
"Invalid personality prefix");
EmitByte(ARM::EHABI::EHT_COMPACT | PI);
}
/// Fill the rest of bytes with FINISH opcode.
- inline void FillFinishOpcode() {
+ void FillFinishOpcode() {
while (Pos < Vec.size())
EmitByte(ARM::EHABI::UNWIND_OPCODE_FINISH);
}
};
-}
+
+} // end anonymous namespace
void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
if (RegSave == 0u)
@@ -153,7 +155,6 @@ void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) {
void UnwindOpcodeAssembler::Finalize(unsigned &PersonalityIndex,
SmallVectorImpl<uint8_t> &Result) {
-
UnwindOpcodeStreamer OpStreamer(Result);
if (HasPersonality) {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
index e0c113e..a7bfbdf 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -16,8 +16,8 @@
#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ARMEHABI.h"
-#include "llvm/Support/DataTypes.h"
+#include <cstddef>
+#include <cstdint>
namespace llvm {
@@ -25,13 +25,12 @@ class MCSymbol;
class UnwindOpcodeAssembler {
private:
- llvm::SmallVector<uint8_t, 32> Ops;
- llvm::SmallVector<unsigned, 8> OpBegins;
- bool HasPersonality;
+ SmallVector<uint8_t, 32> Ops;
+ SmallVector<unsigned, 8> OpBegins;
+ bool HasPersonality = false;
public:
- UnwindOpcodeAssembler()
- : HasPersonality(0) {
+ UnwindOpcodeAssembler() {
OpBegins.push_back(0);
}
@@ -40,12 +39,12 @@ public:
Ops.clear();
OpBegins.clear();
OpBegins.push_back(0);
- HasPersonality = 0;
+ HasPersonality = false;
}
/// Set the personality
void setPersonality(const MCSymbol *Per) {
- HasPersonality = 1;
+ HasPersonality = true;
}
/// Emit unwind opcodes for .save directives
@@ -88,6 +87,6 @@ private:
}
};
-} // namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 166c04b..f74fb2e 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -9,33 +9,41 @@
#include "MCTargetDesc/ARMFixupKinds.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWinCOFFObjectWriter.h"
-#include "llvm/Support/COFF.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
using namespace llvm;
namespace {
+
class ARMWinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
public:
ARMWinCOFFObjectWriter(bool Is64Bit)
: MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) {
assert(!Is64Bit && "AArch64 support not yet implemented");
}
- ~ARMWinCOFFObjectWriter() override {}
- unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsCrossSection,
+ ~ARMWinCOFFObjectWriter() override = default;
+
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsCrossSection,
const MCAsmBackend &MAB) const override;
bool recordRelocation(const MCFixup &) const override;
};
-unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
+} // end anonymous namespace
+
+unsigned ARMWinCOFFObjectWriter::getRelocType(MCContext &Ctx,
+ const MCValue &Target,
const MCFixup &Fixup,
bool IsCrossSection,
const MCAsmBackend &MAB) const {
@@ -79,13 +87,13 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
return static_cast<unsigned>(Fixup.getKind()) != ARM::fixup_t2_movt_hi16;
}
-}
namespace llvm {
+
MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
bool Is64Bit) {
MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit);
return createWinCOFFObjectWriter(MOTW, OS);
}
-}
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 9953c61..5709b4e 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -12,13 +12,35 @@
//===----------------------------------------------------------------------===//
#include "Thumb1FrameLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "Thumb1InstrInfo.h"
+#include "ThumbRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <iterator>
+#include <vector>
using namespace llvm;
@@ -61,13 +83,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// ADJCALLSTACKUP -> add, sp, sp, amount
MachineInstr &Old = *I;
DebugLoc dl = Old.getDebugLoc();
- unsigned Amount = Old.getOperand(0).getImm();
+ unsigned Amount = TII.getFrameSize(Old);
if (Amount != 0) {
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned Align = getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
+ Amount = alignTo(Amount, getStackAlignment());
// Replace the pseudo instruction with a new instruction...
unsigned Opc = Old.getOpcode();
@@ -215,7 +236,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R12:
if (STI.splitFramePushPop(MF))
break;
- // fallthough
+ LLVM_FALLTHROUGH;
case ARM::R0:
case ARM::R1:
case ARM::R2:
@@ -238,9 +259,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
if (HasFP) {
FramePtrOffsetInBlock +=
MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
- .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4)
- .setMIFlags(MachineInstr::FrameSetup));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+ .addReg(ARM::SP)
+ .addImm(FramePtrOffsetInBlock / 4)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
if(FramePtrOffsetInBlock) {
CFAOffset += FramePtrOffsetInBlock;
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
@@ -336,14 +359,19 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
// will be allocated after this, so we can still use the base pointer
// to reference locals.
if (RegInfo->hasBasePointer(MF))
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
- .addReg(ARM::SP));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
// If the frame has variable sized objects then the epilogue must restore
// the sp from fp. We can assume there's an FP here since hasFP already
// checks for hasVarSizedObjects.
if (MFI.hasVarSizedObjects())
AFI->setShouldRestoreSPFromFP(true);
+
+ // In some cases, virtual registers have been introduced, e.g. by uses of
+ // emitThumbRegPlusImmInReg.
+ MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs);
}
static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) {
@@ -408,13 +436,13 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
"No scratch register to restore SP from FP!");
emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
TII, *RegInfo);
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
- ARM::SP)
- .addReg(ARM::R4));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(ARM::R4)
+ .add(predOps(ARMCC::AL));
} else
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
- ARM::SP)
- .addReg(FramePtr));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(FramePtr)
+ .add(predOps(ARMCC::AL));
} else {
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
&MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
@@ -493,12 +521,12 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET)
return true;
MachineInstrBuilder MIB =
- AddDefaultPred(
- BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)));
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))
+ .add(predOps(ARMCC::AL));
// Copy implicit ops and popped registers, if any.
for (auto MO: MBBI->operands())
if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
- MIB.addOperand(MO);
+ MIB.add(MO);
MIB.addReg(ARM::PC, RegState::Define);
// Erase the old instruction (tBX_RET or tPOP).
MBB.erase(MBBI);
@@ -507,14 +535,14 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
// Look for a temporary register to use.
// First, compute the liveness information.
- LivePhysRegs UsedRegs(STI.getRegisterInfo());
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ LivePhysRegs UsedRegs(TRI);
UsedRegs.addLiveOuts(MBB);
// The semantic of pristines changed recently and now,
// the callee-saved registers that are touched in the function
// are not part of the pristines set anymore.
// Add those callee-saved now.
- const TargetRegisterInfo *TRI = STI.getRegisterInfo();
- const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; ++i)
UsedRegs.addReg(CSRegs[i]);
@@ -533,18 +561,17 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
// And some temporary register, just in case.
unsigned TemporaryReg = 0;
BitVector PopFriendly =
- TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID));
+ TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID));
assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
// Rebuild the GPRs from the high registers because they are removed
// form the GPR reg class for thumb1.
BitVector GPRsNoLRSP =
- TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID));
+ TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID));
GPRsNoLRSP |= PopFriendly;
GPRsNoLRSP.reset(ARM::LR);
GPRsNoLRSP.reset(ARM::SP);
GPRsNoLRSP.reset(ARM::PC);
- for (int Register = GPRsNoLRSP.find_first(); Register != -1;
- Register = GPRsNoLRSP.find_next(Register)) {
+ for (unsigned Register : GPRsNoLRSP.set_bits()) {
if (!UsedRegs.contains(Register)) {
// Remember the first pop-friendly register and exit.
if (PopFriendly.test(Register)) {
@@ -566,22 +593,23 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
if (TemporaryReg) {
assert(!PopReg && "Unnecessary MOV is about to be inserted");
PopReg = PopFriendly.find_first();
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(TemporaryReg, RegState::Define)
- .addReg(PopReg, RegState::Kill));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(TemporaryReg, RegState::Define)
+ .addReg(PopReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
}
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
// We couldn't use the direct restoration above, so
// perform the opposite conversion: tPOP_RET to tPOP.
MachineInstrBuilder MIB =
- AddDefaultPred(
- BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)));
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL));
bool Popped = false;
for (auto MO: MBBI->operands())
if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
MO.getReg() != ARM::PC) {
- MIB.addOperand(MO);
+ MIB.add(MO);
if (!MO.isImplicit())
Popped = true;
}
@@ -590,23 +618,27 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
MBB.erase(MIB.getInstr());
// Erase the old instruction.
MBB.erase(MBBI);
- MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)));
+ MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))
+ .add(predOps(ARMCC::AL));
}
assert(PopReg && "Do not know how to get LR");
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
.addReg(PopReg, RegState::Define);
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::LR, RegState::Define)
- .addReg(PopReg, RegState::Kill));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(PopReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
if (TemporaryReg)
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(PopReg, RegState::Define)
- .addReg(TemporaryReg, RegState::Kill));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(PopReg, RegState::Define)
+ .addReg(TemporaryReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
return true;
}
@@ -666,13 +698,14 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
CopyRegs.insert(ArgReg);
// Push the low registers and lr
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
if (!LoRegsToSave.empty()) {
- MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
- AddDefaultPred(MIB);
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) {
if (LoRegsToSave.count(Reg)) {
- bool isKill = !MF.getRegInfo().isLiveIn(Reg);
- if (isKill)
+ bool isKill = !MRI.isLiveIn(Reg);
+ if (isKill && !MRI.isReserved(Reg))
MBB.addLiveIn(Reg);
MIB.addReg(Reg, getKillRegState(isKill));
@@ -708,22 +741,21 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
// Create the PUSH, but don't insert it yet (the MOVs need to come first).
- MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH));
- AddDefaultPred(PushMIB);
+ MachineInstrBuilder PushMIB =
+ BuildMI(MF, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
SmallVector<unsigned, 4> RegsToPush;
while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
if (HiRegsToSave.count(*HiRegToSave)) {
- bool isKill = !MF.getRegInfo().isLiveIn(*HiRegToSave);
- if (isKill)
+ bool isKill = !MRI.isLiveIn(*HiRegToSave);
+ if (isKill && !MRI.isReserved(*HiRegToSave))
MBB.addLiveIn(*HiRegToSave);
// Emit a MOV from the high reg to the low reg.
- MachineInstrBuilder MIB =
- BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr));
- MIB.addReg(*CopyReg, RegState::Define);
- MIB.addReg(*HiRegToSave, getKillRegState(isKill));
- AddDefaultPred(MIB);
+ BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
+ .addReg(*CopyReg, RegState::Define)
+ .addReg(*HiRegToSave, getKillRegState(isKill))
+ .add(predOps(ARMCC::AL));
// Record the register that must be added to the PUSH.
RegsToPush.push_back(*CopyReg);
@@ -735,7 +767,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
}
// Add the low registers to the PUSH, in ascending order.
- for (unsigned Reg : reverse(RegsToPush))
+ for (unsigned Reg : llvm::reverse(RegsToPush))
PushMIB.addReg(Reg, RegState::Kill);
// Insert the PUSH instruction after the MOVs.
@@ -817,19 +849,18 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
// Create the POP instruction.
- MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP));
- AddDefaultPred(PopMIB);
+ MachineInstrBuilder PopMIB =
+ BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
// Add the low register to the POP.
PopMIB.addReg(*CopyReg, RegState::Define);
// Create the MOV from low to high register.
- MachineInstrBuilder MIB =
- BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr));
- MIB.addReg(*HiRegToRestore, RegState::Define);
- MIB.addReg(*CopyReg, RegState::Kill);
- AddDefaultPred(MIB);
+ BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
+ .addReg(*HiRegToRestore, RegState::Define)
+ .addReg(*CopyReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
HiRegToRestore =
@@ -837,11 +868,8 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
}
}
-
-
-
- MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
- AddDefaultPred(MIB);
+ MachineInstrBuilder MIB =
+ BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
bool NeedsPop = false;
for (unsigned i = CSI.size(); i != 0; --i) {
@@ -859,6 +887,16 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// ARMv4T requires BX, see emitEpilogue
if (!STI.hasV5TOps())
continue;
+ // Tailcall optimization failed; change TCRETURN to a tBL
+ if (MI->getOpcode() == ARM::TCRETURNdi ||
+ MI->getOpcode() == ARM::TCRETURNri) {
+ unsigned Opcode = MI->getOpcode() == ARM::TCRETURNdi
+ ? ARM::tBL : ARM::tBLXr;
+ MachineInstrBuilder BL = BuildMI(MF, DL, TII.get(Opcode));
+ BL.add(predOps(ARMCC::AL));
+ BL.add(MI->getOperand(0));
+ MBB.insert(MI, &*BL);
+ }
Reg = ARM::PC;
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
if (MI != MBB.end())
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 4b4fbaa..3a3920a 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMSubtarget.h"
#include "Thumb1InstrInfo.h"
+#include "ARMSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -24,8 +24,8 @@ using namespace llvm;
Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI), RI() {}
-/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
-void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+/// Return the noop instruction to use for a noop.
+void Thumb1InstrInfo::getNoop(MCInst &NopInst) const {
NopInst.setOpcode(ARM::tMOVr);
NopInst.addOperand(MCOperand::createReg(ARM::R8));
NopInst.addOperand(MCOperand::createReg(ARM::R8));
@@ -50,20 +50,29 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg)
|| !ARM::tGPRRegClass.contains(DestReg))
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc)));
+ BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
else {
- // FIXME: The performance consequences of this are going to be atrocious.
- // Some things to try that should be better:
- // * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11
- // * 'movs $dst, $src' if cpsr isn't live
- // See: http://lists.llvm.org/pipermail/llvm-dev/2014-August/075998.html
+ // FIXME: Can also use 'mov hi, $src; mov $dst, hi',
+ // with hi as either r10 or r11.
+
+ const TargetRegisterInfo *RegInfo = st.getRegisterInfo();
+ if (MBB.computeRegisterLiveness(RegInfo, ARM::CPSR, I)
+ == MachineBasicBlock::LQR_Dead) {
+ BuildMI(MBB, I, DL, get(ARM::tMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ ->addRegisterDead(ARM::CPSR, RegInfo);
+ return;
+ }
// 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH)))
- .addReg(SrcReg, getKillRegState(KillSrc));
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPOP)))
- .addReg(DestReg, getDefRegState(true));
+ BuildMI(MBB, I, DL, get(ARM::tPUSH))
+ .add(predOps(ARMCC::AL))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, I, DL, get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .addReg(DestReg, getDefRegState(true));
}
}
@@ -87,9 +96,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::tSTRspi))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
}
}
@@ -113,8 +125,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
}
}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
index 931914a..e8d9a9c 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -25,8 +25,8 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo {
public:
explicit Thumb1InstrInfo(const ARMSubtarget &STI);
- /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
- void getNoopForMachoTarget(MCInst &NopInst) const override;
+ /// Return the noop instruction to use for a noop.
+ void getNoop(MCInst &NopInst) const override;
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
index d01fc8c..04bdd91 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -9,13 +9,26 @@
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include <cassert>
+#include <new>
+
using namespace llvm;
#define DEBUG_TYPE "thumb2-it"
@@ -24,16 +37,18 @@ STATISTIC(NumITs, "Number of IT blocks inserted");
STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
namespace {
+
class Thumb2ITBlockPass : public MachineFunctionPass {
public:
static char ID;
- Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
bool restrictIT;
const Thumb2InstrInfo *TII;
const TargetRegisterInfo *TRI;
ARMFunctionInfo *AFI;
+ Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+
bool runOnMachineFunction(MachineFunction &Fn) override;
MachineFunctionProperties getRequiredProperties() const override {
@@ -52,8 +67,10 @@ namespace {
SmallSet<unsigned, 4> &Uses);
bool InsertITInstructions(MachineBasicBlock &MBB);
};
+
char Thumb2ITBlockPass::ID = 0;
-}
+
+} // end anonymous namespace
/// TrackDefUses - Tracking what registers are being defined and used by
/// instructions in the IT block. This also tracks "dependencies", i.e. uses
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 1c731d6..9125be9 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===//
+//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,16 +11,26 @@
//
//===----------------------------------------------------------------------===//
-#include "Thumb2InstrInfo.h"
-#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
using namespace llvm;
@@ -30,10 +40,10 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
cl::init(false));
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI() {}
+ : ARMBaseInstrInfo(STI) {}
-/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
-void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+/// Return the noop instruction to use for a noop.
+void Thumb2InstrInfo::getNoop(MCInst &NopInst) const {
NopInst.setOpcode(ARM::tHINT);
NopInst.addOperand(MCOperand::createImm(0));
NopInst.addOperand(MCOperand::createImm(ARMCC::AL));
@@ -117,8 +127,9 @@ void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc)));
+ BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
}
void Thumb2InstrInfo::
@@ -138,9 +149,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::t2STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
return;
}
@@ -156,8 +170,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
- MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
- AddDefaultPred(MIB);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL));
return;
}
@@ -180,8 +193,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
return;
}
@@ -198,8 +214,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
- MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
- AddDefaultPred(MIB);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL));
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
@@ -259,10 +274,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
if (Fits) {
if (isSub) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg)
- .addReg(BaseReg)
- .addReg(DestReg, RegState::Kill)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
- .setMIFlags(MIFlags);
+ .addReg(BaseReg)
+ .addReg(DestReg, RegState::Kill)
+ .add(predOps(Pred, PredReg))
+ .add(condCodeOp())
+ .setMIFlags(MIFlags);
} else {
// Here we know that DestReg is not SP but we do not
// know anything about BaseReg. t2ADDrr is an invalid
@@ -270,10 +286,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
// is fine if SP is the first argument. To be sure we
// do not generate invalid encoding, put BaseReg first.
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg)
- .addReg(BaseReg)
- .addReg(DestReg, RegState::Kill)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
- .setMIFlags(MIFlags);
+ .addReg(BaseReg)
+ .addReg(DestReg, RegState::Kill)
+ .add(predOps(Pred, PredReg))
+ .add(condCodeOp())
+ .setMIFlags(MIFlags);
}
return;
}
@@ -284,8 +301,10 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
unsigned Opc = 0;
if (DestReg == ARM::SP && BaseReg != ARM::SP) {
// mov sp, rn. Note t2MOVr cannot be used.
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),DestReg)
- .addReg(BaseReg).setMIFlags(MIFlags));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(BaseReg)
+ .setMIFlags(MIFlags)
+ .add(predOps(ARMCC::AL));
BaseReg = ARM::SP;
continue;
}
@@ -296,8 +315,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags));
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg)
+ .addImm(ThisVal / 4)
+ .setMIFlags(MIFlags)
+ .add(predOps(ARMCC::AL));
NumBytes = 0;
continue;
}
@@ -334,12 +356,13 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
}
// Build the new ADD / SUB.
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg, RegState::Kill)
- .addImm(ThisVal)).setMIFlags(MIFlags);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm(ThisVal)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MIFlags);
if (HasCCOut)
- AddDefaultCC(MIB);
+ MIB.add(condCodeOp());
BaseReg = DestReg;
}
@@ -474,7 +497,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
do MI.RemoveOperand(FrameRegIdx+1);
while (MI.getNumOperands() > FrameRegIdx+1);
MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
return true;
}
@@ -526,9 +549,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// Add cc_out operand if the original instruction did not have one.
if (!HasCCOut)
MI.addOperand(MachineOperand::CreateReg(0, false));
-
} else {
-
// AddrMode4 and AddrMode6 cannot handle any offset.
if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
return false;
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index 15d6330..c834ba7 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -26,8 +26,8 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo {
public:
explicit Thumb2InstrInfo(const ARMSubtarget &STI);
- /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
- void getNoopForMachoTarget(MCInst &NopInst) const override;
+ /// Return the noop instruction to use for a noop.
+ void getNoop(MCInst &NopInst) const override;
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index 8208e7e..d911dd9 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -10,20 +10,38 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Function.h" // To access Function attributes
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <iterator>
#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "t2-reduce-size"
@@ -40,6 +58,7 @@ static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
cl::init(-1), cl::Hidden);
namespace {
+
/// ReduceTable - A static table with information on mapping from wide
/// opcodes to narrow
struct ReduceEntry {
@@ -139,11 +158,12 @@ namespace {
class Thumb2SizeReduce : public MachineFunctionPass {
public:
static char ID;
- Thumb2SizeReduce(std::function<bool(const Function &)> Ftor);
const Thumb2InstrInfo *TII;
const ARMSubtarget *STI;
+ Thumb2SizeReduce(std::function<bool(const Function &)> Ftor);
+
bool runOnMachineFunction(MachineFunction &MF) override;
MachineFunctionProperties getRequiredProperties() const override {
@@ -201,19 +221,21 @@ namespace {
struct MBBInfo {
// The flags leaving this block have high latency.
- bool HighLatencyCPSR;
+ bool HighLatencyCPSR = false;
// Has this block been visited yet?
- bool Visited;
+ bool Visited = false;
- MBBInfo() : HighLatencyCPSR(false), Visited(false) {}
+ MBBInfo() = default;
};
SmallVector<MBBInfo, 8> BlockInfo;
std::function<bool(const Function &)> PredicateFtor;
};
+
char Thumb2SizeReduce::ID = 0;
-}
+
+} // end anonymous namespace
Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
: MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
@@ -490,14 +512,13 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
isLdStMul = true;
break;
}
- case ARM::t2STMIA: {
+ case ARM::t2STMIA:
// If the base register is killed, we don't care what its value is after the
// instruction, so we can use an updating STMIA.
if (!MI->getOperand(0).isKill())
return false;
break;
- }
case ARM::t2LDMIA_RET: {
unsigned BaseReg = MI->getOperand(1).getReg();
if (BaseReg != ARM::SP)
@@ -562,8 +583,8 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
if (!isLdStMul) {
- MIB.addOperand(MI->getOperand(0));
- MIB.addOperand(MI->getOperand(1));
+ MIB.add(MI->getOperand(0));
+ MIB.add(MI->getOperand(1));
if (HasImmOffset)
MIB.addImm(OffsetImm / Scale);
@@ -577,7 +598,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// Transfer the rest of operands.
for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
- MIB.addOperand(MI->getOperand(OpNum));
+ MIB.add(MI->getOperand(OpNum));
// Transfer memoperands.
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
@@ -621,12 +642,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
return false;
- MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
- TII->get(ARM::tADDrSPi))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
- AddDefaultPred(MIB);
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(ARM::tADDrSPi))
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(Imm / 4) // The tADDrSPi has an implied scale by four.
+ .add(predOps(ARMCC::AL));
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
@@ -652,11 +674,10 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) {
switch (Opc) {
default: break;
- case ARM::t2ADDSri: {
+ case ARM::t2ADDSri:
if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
return true;
LLVM_FALLTHROUGH;
- }
case ARM::t2ADDSrr:
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
}
@@ -698,7 +719,6 @@ bool
Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
bool LiveCPSR, bool IsSelfLoop) {
-
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
@@ -785,13 +805,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
- MIB.addOperand(MI->getOperand(0));
- if (NewMCID.hasOptionalDef()) {
- if (HasCC)
- AddDefaultT1CC(MIB, CCDead);
- else
- AddNoT1CC(MIB);
- }
+ MIB.add(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef())
+ MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
// Transfer the rest of operands.
unsigned NumOps = MCID.getNumOperands();
@@ -800,7 +816,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
continue;
if (SkipPred && MCID.OpInfo[i].isPredicate())
continue;
- MIB.addOperand(MI->getOperand(i));
+ MIB.add(MI->getOperand(i));
}
// Transfer MI flags.
@@ -880,13 +896,9 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
- MIB.addOperand(MI->getOperand(0));
- if (NewMCID.hasOptionalDef()) {
- if (HasCC)
- AddDefaultT1CC(MIB, CCDead);
- else
- AddNoT1CC(MIB);
- }
+ MIB.add(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef())
+ MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
// Transfer the rest of operands.
unsigned NumOps = MCID.getNumOperands();
@@ -909,10 +921,10 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Skip implicit def of CPSR. Either it's modeled as an optional
// def now or it's already an implicit def on the new instruction.
continue;
- MIB.addOperand(MO);
+ MIB.add(MO);
}
if (!MCID.isPredicable() && NewMCID.isPredicable())
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
diff --git a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
index 2efd63b..15a5675 100644
--- a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -93,9 +93,10 @@ static void emitThumb2LoadConstPool(MachineBasicBlock &MBB,
unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
- .addReg(DestReg, getDefRegState(true), SubIdx)
- .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
- .setMIFlags(MIFlags);
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MIFlags);
}
/// emitLoadConstPool - Emits a load from constpool to materialize the
@@ -145,14 +146,17 @@ static void emitThumbRegPlusImmInReg(
LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) {
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)
+ .add(t1CondCodeOp())
.addImm(NumBytes)
.setMIFlags(MIFlags);
} else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) {
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)
+ .add(t1CondCodeOp())
.addImm(NumBytes)
.setMIFlags(MIFlags);
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg)
+ .add(t1CondCodeOp())
.addReg(LdReg, RegState::Kill)
.setMIFlags(MIFlags);
} else if (ST.genExecuteOnly()) {
@@ -167,12 +171,12 @@ static void emitThumbRegPlusImmInReg(
: ((isHigh || !CanChangeCC) ? ARM::tADDhirr : ARM::tADDrr);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
if (Opc != ARM::tADDhirr)
- MIB = AddDefaultT1CC(MIB);
+ MIB = MIB.add(t1CondCodeOp());
if (DestReg == ARM::SP || isSub)
MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
else
MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
}
/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
@@ -307,12 +311,12 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg);
if (CopyNeedsCC)
- MIB = AddDefaultT1CC(MIB);
+ MIB = MIB.add(t1CondCodeOp());
MIB.addReg(BaseReg, RegState::Kill);
if (CopyOpc != ARM::tMOVr) {
MIB.addImm(CopyImm);
}
- AddDefaultPred(MIB.setMIFlags(MIFlags));
+ MIB.setMIFlags(MIFlags).add(predOps(ARMCC::AL));
BaseReg = DestReg;
}
@@ -324,10 +328,11 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg);
if (ExtraNeedsCC)
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(BaseReg).addImm(ExtraImm);
- MIB = AddDefaultPred(MIB);
- MIB.setMIFlags(MIFlags);
+ MIB = MIB.add(t1CondCodeOp());
+ MIB.addReg(BaseReg)
+ .addImm(ExtraImm)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MIFlags);
}
}
@@ -460,9 +465,10 @@ bool ThumbRegisterInfo::saveScavengerRegister(
// a call clobbered register that we know won't be used in Thumb1 mode.
const TargetInstrInfo &TII = *STI.getInstrInfo();
DebugLoc DL;
- AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
- .addReg(ARM::R12, RegState::Define)
- .addReg(Reg, RegState::Kill));
+ BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
+ .addReg(ARM::R12, RegState::Define)
+ .addReg(Reg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
// The UseMI is where we would like to restore the register. If there's
// interference with R12 before then, however, we'll need to restore it
@@ -490,8 +496,10 @@ bool ThumbRegisterInfo::saveScavengerRegister(
}
}
// Restore the register from R12
- AddDefaultPred(BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)).
- addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill));
+ BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr))
+ .addReg(Reg, RegState::Define)
+ .addReg(ARM::R12, RegState::Kill)
+ .add(predOps(ARMCC::AL));
return true;
}
@@ -621,5 +629,5 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Add predicate back if it's needed.
if (MI.isPredicable())
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
}
OpenPOWER on IntegriCloud