diff options
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
89 files changed, 10433 insertions, 4929 deletions
diff --git a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp index 89859ba..8640c87 100644 --- a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp @@ -427,13 +427,11 @@ unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, unsigned Lane, bool QPR) { unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : &ARM::DPRRegClass); - AddDefaultPred(BuildMI(MBB, - InsertBefore, - DL, - TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), - Out) - .addReg(Reg) - .addImm(Lane)); + BuildMI(MBB, InsertBefore, DL, + TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out) + .addReg(Reg) + .addImm(Lane) + .add(predOps(ARMCC::AL)); return Out; } @@ -476,13 +474,11 @@ unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB, const DebugLoc &DL, unsigned Ssub0, unsigned Ssub1) { unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); - AddDefaultPred(BuildMI(MBB, - InsertBefore, - DL, - TII->get(ARM::VEXTd32), Out) - .addReg(Ssub0) - .addReg(Ssub1) - .addImm(1)); + BuildMI(MBB, InsertBefore, DL, TII->get(ARM::VEXTd32), Out) + .addReg(Ssub0) + .addReg(Ssub1) + .addImm(1) + .add(predOps(ARMCC::AL)); return Out; } diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h index be30482..4676226 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.h +++ b/contrib/llvm/lib/Target/ARM/ARM.h @@ -16,21 +16,24 @@ #define LLVM_LIB_TARGET_ARM_ARM_H #include "llvm/Support/CodeGen.h" -#include "ARMBasicBlockInfo.h" #include <functional> +#include <vector> namespace llvm { class ARMAsmPrinter; class ARMBaseTargetMachine; +class ARMRegisterBankInfo; +class ARMSubtarget; +struct BasicBlockInfo; class Function; class FunctionPass; -class ImmutablePass; +class InstructionSelector; +class MachineBasicBlock; +class MachineFunction; class MachineInstr; class MCInst; class PassRegistry; -class TargetLowering; -class TargetMachine; FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); @@ -43,6 +46,9 @@ FunctionPass *createThumb2ITBlockPass(); FunctionPass *createARMOptimizeBarriersPass(); FunctionPass *createThumb2SizeReductionPass( std::function<bool(const Function &)> Ftor = nullptr); +InstructionSelector * +createARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI, + const ARMRegisterBankInfo &RBI); void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); @@ -53,7 +59,8 @@ std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF); void initializeARMLoadStoreOptPass(PassRegistry &); void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); +void initializeARMConstantIslandsPass(PassRegistry &); -} // end namespace llvm; +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARM_H diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index 2a090fa..e49c1ba 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -17,144 +17,172 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// -// ARM Helper classes. +// ARM Subtarget state. // -class ProcNoItin<string Name, list<SubtargetFeature> Features> - : Processor<Name, NoItineraries, Features>; +def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", + "true", "Thumb mode">; + +def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat", + "true", "Use software floating " + "point features.">; -class Architecture<string fname, string aname, list<SubtargetFeature> features > - : SubtargetFeature<fname, "ARMArch", aname, - !strconcat(aname, " architecture"), features>; //===----------------------------------------------------------------------===// -// ARM Subtarget state. +// ARM Subtarget features. // -def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true", - "Thumb mode">; +// Floating Point, HW Division and Neon Support +def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", + "Enable VFP2 instructions">; -def ModeSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", - "Use software floating point features.">; +def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", + "Enable VFP3 instructions", + [FeatureVFP2]>; -//===----------------------------------------------------------------------===// -// ARM Subtarget features. -// +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable NEON instructions", + [FeatureVFP3]>; + +def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", + "Enable half-precision " + "floating point">; + +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", + "Enable VFP4 instructions", + [FeatureVFP3, FeatureFP16]>; + +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", + "true", "Enable ARMv8 FP", + [FeatureVFP4]>; + +def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", + "Enable full half-precision " + "floating point", + [FeatureFPARMv8]>; + +def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", + "Floating point unit supports " + "single precision only">; + +def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", + "Restrict FP to 16 double registers">; + +def FeatureHWDivThumb : SubtargetFeature<"hwdiv", + "HasHardwareDivideInThumb", "true", + "Enable divide instructions in Thumb">; + +def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", + "HasHardwareDivideInARM", "true", + "Enable divide instructions in ARM mode">; + +// Atomic Support +def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", + "Has data barrier (dmb/dsb) instructions">; + +def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", + "Has v7 clrex instruction">; -def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", - "Enable VFP2 instructions">; -def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", - "Enable VFP3 instructions", - [FeatureVFP2]>; -def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", - "Enable NEON instructions", - [FeatureVFP3]>; -def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", - "Enable Thumb2 instructions">; -def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", - "Does not support ARM mode execution", - [ModeThumb]>; -def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", - "Enable half-precision floating point">; -def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", - "Enable VFP4 instructions", - [FeatureVFP3, FeatureFP16]>; -def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", - "true", "Enable ARMv8 FP", - [FeatureVFP4]>; -def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", - "Enable full half-precision floating point", - [FeatureFPARMv8]>; -def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", - "Restrict FP to 16 double registers">; -def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", - "Enable divide instructions">; -def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", - "HasHardwareDivideInARM", "true", - "Enable divide instructions in ARM mode">; -def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", - "Enable Thumb2 extract and pack instructions">; -def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", - "Has data barrier (dmb / dsb) instructions">; -def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", - "Has v7 clrex instruction">; def FeatureAcquireRelease : SubtargetFeature<"acquire-release", "HasAcquireRelease", "true", - "Has v8 acquire/release (lda/ldaex etc) instructions">; -def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", - "FP compare + branch is slow">; -def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", - "Floating point unit supports single precision only">; -def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", - "Enable support for Performance Monitor extensions">; -def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", - "Enable support for TrustZone security extensions">; -def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", - "Enable support for ARMv8-M Security Extensions">; -def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", - "Enable support for Cryptography extensions", - [FeatureNEON]>; -def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", - "Enable support for CRC instructions">; + "Has v8 acquire/release (lda/ldaex " + " etc) instructions">; + + +def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", + "FP compare + branch is slow">; + +def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", + "Enable support for Performance " + "Monitor extensions">; + + +// TrustZone Security Extensions +def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", + "Enable support for TrustZone " + "security extensions">; + +def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", + "Enable support for ARMv8-M " + "Security Extensions">; + +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable support for " + "Cryptography extensions", + [FeatureNEON]>; + +def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", + "Enable support for CRC instructions">; + + // Not to be confused with FeatureHasRetAddrStack (return address stack) -def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", - "Enable Reliability, Availability and Serviceability extensions">; -def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", - "Enable fast computation of positive address offsets">; +def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", + "Enable Reliability, Availability " + "and Serviceability extensions">; +// Fast computation of non-negative address offsets +def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", + "Enable fast computation of " + "positive address offsets">; -// Cyclone has preferred instructions for zeroing VFP registers, which can -// execute in 0 cycles. -def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", - "Has zero-cycle zeroing instructions">; +// Fast execution of AES crypto operations +def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", + "CPU fuses AES crypto operations">; -// Whether or not it may be profitable to unpredicate certain instructions -// during if conversion. +// Cyclone can zero VFP registers in 0 cycles. +def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", + "Has zero-cycle zeroing instructions">; + +// Whether it is profitable to unpredicate certain instructions during if-conversion def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", - "IsProfitableToUnpredicate", - "true", + "IsProfitableToUnpredicate", "true", "Is profitable to unpredicate">; // Some targets (e.g. Swift) have microcoded VGETLNi32. -def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", - "HasSlowVGETLNi32", "true", - "Has slow VGETLNi32 - prefer VMOV">; +def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", + "HasSlowVGETLNi32", "true", + "Has slow VGETLNi32 - prefer VMOV">; // Some targets (e.g. Swift) have microcoded VDUP32. -def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", "true", - "Has slow VDUP32 - prefer VMOV">; +def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", + "true", + "Has slow VDUP32 - prefer VMOV">; // Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON // for scalar FP, as this allows more effective execution domain optimization. -def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", - "true", "Prefer VMOVSR">; +def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", + "true", "Prefer VMOVSR">; // Swift has ISHST barriers compatible with Atomic Release semantics but weaker // than ISH def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST", - "true", "Prefer ISHST barriers">; + "true", "Prefer ISHST barriers">; // Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU. -def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", "true", - "Has muxed AGU and NEON/FPU">; +def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", + "true", + "Has muxed AGU and NEON/FPU">; -// On some targets, a VLDM/VSTM starting with an odd register number needs more -// microops than single VLDRS. +// Whether VLDM/VSTM starting with odd register number need more microops +// than single VLDRS def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister", - "true", "VLDM/VSTM starting with an odd register is slow">; + "true", "VLDM/VSTM starting " + "with an odd register is slow">; // Some targets have a renaming dependency when loading into D subregisters. def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg", "SlowLoadDSubregister", "true", "Loading into D subregs is slow">; + // Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD. def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs", "DontWidenVMOVS", "true", "Don't widen VMOVS to VMOVD">; // Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions. -def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", "ExpandMLx", "true", - "Expand VFP/NEON MLA/MLS instructions">; +def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", + "ExpandMLx", "true", + "Expand VFP/NEON MLA/MLS instructions">; // Some targets have special RAW hazards for VFP/NEON VMLA/VMLS. def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", @@ -162,15 +190,18 @@ def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", // Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from // VFP to NEON, as an execution domain optimization. -def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs", - "true", "Convert VMOVSR, VMOVRS, VMOVS to NEON">; +def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", + "UseNEONForFPMovs", "true", + "Convert VMOVSR, VMOVRS, " + "VMOVS to NEON">; // Some processors benefit from using NEON instructions for scalar // single-precision FP operations. This affects instruction selection and should // only be enabled if the handling of denormals is not important. -def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", - "true", - "Use NEON for single precision FP">; +def FeatureNEONForFP : SubtargetFeature<"neonfp", + "UseNEONForSinglePrecisionFP", + "true", + "Use NEON for single precision FP">; // On some processors, VLDn instructions that access unaligned data take one // extra cycle. Take that into account when computing operand latencies. @@ -181,18 +212,18 @@ def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign", // Some processors have a nonpipelined VFP coprocessor. def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", "NonpipelinedVFP", "true", - "VFP instructions are not pipelined">; + "VFP instructions are not pipelined">; // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better // to just not use them. -def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", - "Disable VFP / NEON MAC instructions">; +def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", + "Disable VFP / NEON MAC instructions">; // Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", - "HasVMLxForwarding", "true", - "Has multiplier accumulator forwarding">; + "HasVMLxForwarding", "true", + "Has multiplier accumulator forwarding">; // Disable 32-bit to 16-bit narrowing for experimentation. def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", @@ -206,62 +237,105 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", "AvoidCPSRPartialUpdate", "true", "Avoid CPSR partial update for OOO execution">; -def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", - "AvoidMOVsShifterOperand", "true", - "Avoid movs instructions with shifter operand">; +/// Disable +1 predication cost for instructions updating CPSR. +/// Enabled for Cortex-A57. +def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr", + "CheapPredicableCPSRDef", + "true", + "Disable +1 predication cost for instructions updating CPSR">; + +def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", + "AvoidMOVsShifterOperand", "true", + "Avoid movs instructions with " + "shifter operand">; // Some processors perform return stack prediction. CodeGen should avoid issue // "normal" call instructions to callees which do not return. -def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", "HasRetAddrStack", "true", - "Has return address stack">; +def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", + "HasRetAddrStack", "true", + "Has return address stack">; + +// Some processors have no branch predictor, which changes the expected cost of +// taking a branch which affects the choice of whether to use predicated +// instructions. +def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor", + "HasBranchPredictor", "false", + "Has no branch predictor">; /// DSP extension. -def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", - "Supports DSP instructions in ARM and/or Thumb2">; +def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", + "Supports DSP instructions in " + "ARM and/or Thumb2">; // Multiprocessing extension. -def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", - "Supports Multiprocessing extension">; +def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", + "Supports Multiprocessing extension">; // Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8). def FeatureVirtualization : SubtargetFeature<"virtualization", - "HasVirtualization", "true", - "Supports Virtualization extension", - [FeatureHWDiv, FeatureHWDivARM]>; + "HasVirtualization", "true", + "Supports Virtualization extension", + [FeatureHWDivThumb, FeatureHWDivARM]>; -// M-series ISA -def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", - "Is microcontroller profile ('M' series)">; +// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. +// See ARMInstrInfo.td for details. +def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", + "NaCl trap">; -// R-series ISA -def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", - "Is realtime profile ('R' series)">; +def FeatureStrictAlign : SubtargetFeature<"strict-align", + "StrictAlign", "true", + "Disallow all unaligned memory " + "access">; + +def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", + "Generate calls via indirect call " + "instructions">; + +def FeatureExecuteOnly : SubtargetFeature<"execute-only", + "GenExecuteOnly", "true", + "Enable the generation of " + "execute only code.">; + +def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", + "Reserve R9, making it unavailable" + " as GPR">; + +def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", + "Don't use movt/movw pairs for " + "32-bit imms">; + +def FeatureNoNegativeImmediates + : SubtargetFeature<"no-neg-immediates", + "NegativeImmediates", "false", + "Convert immediates and instructions " + "to their negated or complemented " + "equivalent when the immediate does " + "not fit in the encoding.">; + + +//===----------------------------------------------------------------------===// +// ARM architecture class +// // A-series ISA def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", "Is application profile ('A' series)">; -// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. -// See ARMInstrInfo.td for details. -def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", - "NaCl trap">; +// R-series ISA +def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", + "Is realtime profile ('R' series)">; -def FeatureStrictAlign : SubtargetFeature<"strict-align", - "StrictAlign", "true", - "Disallow all unaligned memory " - "access">; +// M-series ISA +def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", + "Is microcontroller profile ('M' series)">; -def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", - "Generate calls via indirect call " - "instructions">; -def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", - "Reserve R9, making it unavailable as " - "GPR">; +def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", + "Enable Thumb2 instructions">; -def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", - "Don't use movt/movw pairs for 32-bit " - "imms">; +def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", + "Does not support ARM mode execution", + [ModeThumb]>; //===----------------------------------------------------------------------===// @@ -270,44 +344,57 @@ def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; + def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true", "Support ARM v5T instructions", [HasV4TOps]>; + def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true", - "Support ARM v5TE, v5TEj, and v5TExp instructions", + "Support ARM v5TE, v5TEj, and " + "v5TExp instructions", [HasV5TOps]>; + def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", "Support ARM v6 instructions", [HasV5TEOps]>; + def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true", "Support ARM v6M instructions", [HasV6Ops]>; + def HasV8MBaselineOps : SubtargetFeature<"v8m", "HasV8MBaselineOps", "true", "Support ARM v8M Baseline instructions", [HasV6MOps]>; + def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true", "Support ARM v6k instructions", [HasV6Ops]>; + def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", "Support ARM v6t2 instructions", [HasV8MBaselineOps, HasV6KOps, FeatureThumb2]>; + def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", "Support ARM v7 instructions", [HasV6T2Ops, FeaturePerfMon, FeatureV7Clrex]>; + +def HasV8MMainlineOps : + SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true", + "Support ARM v8M Mainline instructions", + [HasV7Ops]>; + def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", "Support ARM v8 instructions", - [HasV7Ops, FeatureAcquireRelease, - FeatureT2XtPk]>; + [HasV7Ops, FeatureAcquireRelease]>; + def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", "Support ARM v8.1a instructions", [HasV8Ops]>; -def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", + +def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", "Support ARM v8.2a instructions", [HasV8_1aOps]>; -def HasV8MMainlineOps : SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true", - "Support ARM v8M Mainline instructions", - [HasV7Ops]>; //===----------------------------------------------------------------------===// @@ -342,7 +429,9 @@ def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", "Cortex-A73 ARM processors", []>; def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", - "Qualcomm ARM processors", []>; + "Qualcomm Krait processors", []>; +def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", + "Qualcomm Kryo processors", []>; def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", "Swift ARM processors", []>; @@ -361,11 +450,17 @@ def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52", def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3", "Cortex-M3 ARM processors", []>; + //===----------------------------------------------------------------------===// -// ARM schedules. +// ARM Helper classes. // -include "ARMSchedule.td" +class Architecture<string fname, string aname, list<SubtargetFeature> features> + : SubtargetFeature<fname, "ARMArch", aname, + !strconcat(aname, " architecture"), features>; + +class ProcNoItin<string Name, list<SubtargetFeature> Features> + : Processor<Name, NoItineraries, Features>; //===----------------------------------------------------------------------===// @@ -393,8 +488,7 @@ def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>; def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops]>; def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops, - FeatureDSP, - FeatureT2XtPk]>; + FeatureDSP]>; def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>; @@ -415,31 +509,37 @@ def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops, FeatureNEON, FeatureDB, FeatureDSP, - FeatureAClass, - FeatureT2XtPk]>; + FeatureAClass]>; + +def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops, + FeatureNEON, + FeatureDB, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureAClass]>; def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops, FeatureDB, FeatureDSP, - FeatureHWDiv, - FeatureRClass, - FeatureT2XtPk]>; + FeatureHWDivThumb, + FeatureRClass]>; def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, FeatureMClass]>; def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, FeatureMClass, - FeatureDSP, - FeatureT2XtPk]>; + FeatureDSP]>; def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops, FeatureAClass, @@ -481,9 +581,6 @@ def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps, def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, FeatureRClass, FeatureDB, - FeatureHWDiv, - FeatureHWDivARM, - FeatureT2XtPk, FeatureDSP, FeatureCRC, FeatureMP, @@ -495,7 +592,7 @@ def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline", [HasV8MBaselineOps, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, FeatureV7Clrex, Feature8MSecExt, FeatureAcquireRelease, @@ -505,7 +602,7 @@ def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline", [HasV8MMainlineOps, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, Feature8MSecExt, FeatureAcquireRelease, FeatureMClass]>; @@ -520,11 +617,20 @@ def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>; //===----------------------------------------------------------------------===// +// ARM schedules. +//===----------------------------------------------------------------------===// +// +include "ARMSchedule.td" + +//===----------------------------------------------------------------------===// // ARM processors // // Dummy CPU, used to target architectures -def : ProcNoItin<"generic", []>; +def : ProcessorModel<"generic", CortexA8Model, []>; + +// FIXME: Several processors below are not using their own scheduler +// model, but one of similar/previous processor. These should be fixed. def : ProcNoItin<"arm8", [ARMv4]>; def : ProcNoItin<"arm810", [ARMv4]>; @@ -569,6 +675,7 @@ def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m]>; def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m]>; def : Processor<"sc000", ARMV6Itineraries, [ARMv6m]>; +def : Processor<"arm1176j-s", ARMV6Itineraries, [ARMv6kz]>; def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>; def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz, FeatureVFP2, @@ -584,7 +691,6 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2, FeatureVFP2, FeatureHasSlowFPVMLx]>; -// FIXME: A5 has currently the same Schedule model as A8 def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5, FeatureHasRetAddrStack, FeatureTrustZone, @@ -603,8 +709,6 @@ def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7, FeatureVMLxForwarding, FeatureMP, FeatureVFP4, - FeatureHWDiv, - FeatureHWDivARM, FeatureVirtualization]>; def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8, @@ -630,19 +734,15 @@ def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9, FeatureCheckVLDnAlign, FeatureMP]>; -// FIXME: A12 has currently the same Schedule model as A9 def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, FeatureHasRetAddrStack, FeatureTrustZone, FeatureVMLxForwarding, FeatureVFP4, - FeatureHWDiv, - FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureVirtualization, FeatureMP]>; -// FIXME: A15 has currently the same Schedule model as A9. def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, FeatureDontWidenVMOVS, FeatureHasRetAddrStack, @@ -651,26 +751,19 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, FeatureVFP4, FeatureMP, FeatureCheckVLDnAlign, - FeatureHWDiv, - FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureVirtualization]>; -// FIXME: A17 has currently the same Schedule model as A9 def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, FeatureHasRetAddrStack, FeatureTrustZone, FeatureMP, FeatureVMLxForwarding, FeatureVFP4, - FeatureHWDiv, - FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureVirtualization]>; -// FIXME: krait has currently the same Schedule model as A9 -// FIXME: krait has currently the same features as A9 plus VFP4 and hardware -// division features. +// FIXME: krait has currently the same features as A9 plus VFP4 and HWDiv def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, FeatureHasRetAddrStack, FeatureMuxedUnits, @@ -679,7 +772,7 @@ def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, FeatureFP16, FeatureAvoidPartialCPSR, FeatureVFP4, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM]>; def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, @@ -687,7 +780,7 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, FeatureNEONForFP, FeatureVFP4, FeatureMP, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureAvoidMOVsShOp, @@ -700,12 +793,10 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, FeatureSlowVGETLNi32, FeatureSlowVDUP32]>; -// FIXME: R4 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4, FeatureHasRetAddrStack, FeatureAvoidPartialCPSR]>; -// FIXME: R4F has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, FeatureHasRetAddrStack, FeatureSlowFPBrcc, @@ -714,7 +805,6 @@ def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, FeatureD16, FeatureAvoidPartialCPSR]>; -// FIXME: R5 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, FeatureHasRetAddrStack, FeatureVFP3, @@ -724,7 +814,6 @@ def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, FeatureHasSlowFPVMLx, FeatureAvoidPartialCPSR]>; -// FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5. def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, FeatureHasRetAddrStack, FeatureVFP3, @@ -747,63 +836,80 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r, FeatureHasSlowFPVMLx, FeatureAvoidPartialCPSR]>; -def : ProcNoItin<"cortex-m3", [ARMv7m, ProcM3]>; -def : ProcNoItin<"sc300", [ARMv7m, ProcM3]>; +def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m, + ProcM3, + FeatureHasNoBranchPredictor]>; -def : ProcNoItin<"cortex-m4", [ARMv7em, +def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m, + ProcM3, + FeatureHasNoBranchPredictor]>; + +def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em, FeatureVFP4, FeatureVFPOnlySP, - FeatureD16]>; + FeatureD16, + FeatureHasNoBranchPredictor]>; def : ProcNoItin<"cortex-m7", [ARMv7em, FeatureFPARMv8, FeatureD16]>; +def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, + FeatureNoMovt]>; + +def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline, + FeatureDSP, + FeatureFPARMv8, + FeatureD16, + FeatureVFPOnlySP, + FeatureHasNoBranchPredictor]>; + def : ProcNoItin<"cortex-a32", [ARMv8a, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC, FeatureFPAO]>; -def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57, - FeatureHWDiv, +def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC, - FeatureFPAO]>; + FeatureFPAO, + FeatureAvoidPartialCPSR, + FeatureCheapPredicableCPSR]>; def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; -// Cyclone is very similar to swift def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureHasRetAddrStack, FeatureNEONForFP, FeatureVFP4, FeatureMP, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureAvoidMOVsShOp, @@ -812,19 +918,25 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureZCZeroing]>; def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1, - FeatureHWDiv, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"kryo", [ARMv8a, ProcKryo, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; @@ -837,7 +949,7 @@ def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52, //===----------------------------------------------------------------------===// include "ARMRegisterInfo.td" - +include "ARMRegisterBanks.td" include "ARMCallingConv.td" //===----------------------------------------------------------------------===// @@ -845,7 +957,6 @@ include "ARMCallingConv.td" //===----------------------------------------------------------------------===// include "ARMInstrInfo.td" - def ARMInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// @@ -866,7 +977,7 @@ def ARMAsmParserVariant : AsmParserVariant { } def ARM : Target { - // Pull in Instruction Info: + // Pull in Instruction Info. let InstructionSet = ARMInstrInfo; let AssemblyWriters = [ARMAsmWriter]; let AssemblyParserVariants = [ARMAsmParserVariant]; diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 95db35c..582153d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -23,6 +23,8 @@ #include "MCTargetDesc/ARMMCExpr.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" @@ -43,9 +45,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ARMBuildAttributes.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" @@ -589,12 +589,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { ATS.finishAttributeSection(); } -static bool isV8M(const ARMSubtarget *Subtarget) { - // Note that v8M Baseline is a subset of v6T2! - return (Subtarget->hasV8MBaselineOps() && !Subtarget->hasV6T2Ops()) || - Subtarget->hasV8MMainlineOps(); -} - //===----------------------------------------------------------------------===// // Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile() // FIXME: @@ -602,39 +596,6 @@ static bool isV8M(const ARMSubtarget *Subtarget) { // to appear in the .ARM.attributes section in ELF. // Instead of subclassing the MCELFStreamer, we do the work here. -static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU, - const ARMSubtarget *Subtarget) { - if (CPU == "xscale") - return ARMBuildAttrs::v5TEJ; - - if (Subtarget->hasV8Ops()) { - if (Subtarget->isRClass()) - return ARMBuildAttrs::v8_R; - return ARMBuildAttrs::v8_A; - } else if (Subtarget->hasV8MMainlineOps()) - return ARMBuildAttrs::v8_M_Main; - else if (Subtarget->hasV7Ops()) { - if (Subtarget->isMClass() && Subtarget->hasDSP()) - return ARMBuildAttrs::v7E_M; - return ARMBuildAttrs::v7; - } else if (Subtarget->hasV6T2Ops()) - return ARMBuildAttrs::v6T2; - else if (Subtarget->hasV8MBaselineOps()) - return ARMBuildAttrs::v8_M_Base; - else if (Subtarget->hasV6MOps()) - return ARMBuildAttrs::v6S_M; - else if (Subtarget->hasV6Ops()) - return ARMBuildAttrs::v6; - else if (Subtarget->hasV5TEOps()) - return ARMBuildAttrs::v5TE; - else if (Subtarget->hasV5TOps()) - return ARMBuildAttrs::v5T; - else if (Subtarget->hasV4TOps()) - return ARMBuildAttrs::v4T; - else - return ARMBuildAttrs::v4; -} - // Returns true if all functions have the same function attribute value. // It also returns true when the module has no functions. static bool checkFunctionsAttributeConsistency(const Module &M, StringRef Attr, @@ -671,89 +632,8 @@ void ARMAsmPrinter::emitAttributes() { static_cast<const ARMBaseTargetMachine &>(TM); const ARMSubtarget STI(TT, CPU, ArchFS, ATM, ATM.isLittleEndian()); - const std::string &CPUString = STI.getCPUString(); - - if (!StringRef(CPUString).startswith("generic")) { - // FIXME: remove krait check when GNU tools support krait cpu - if (STI.isKrait()) { - ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); - // We consider krait as a "cortex-a9" + hwdiv CPU - // Enable hwdiv through ".arch_extension idiv" - if (STI.hasDivide() || STI.hasDivideInARMMode()) - ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM); - } else - ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); - } - - ATS.emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(CPUString, &STI)); - - // Tag_CPU_arch_profile must have the default value of 0 when "Architecture - // profile is not applicable (e.g. pre v7, or cross-profile code)". - if (STI.hasV7Ops() || isV8M(&STI)) { - if (STI.isAClass()) { - ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::ApplicationProfile); - } else if (STI.isRClass()) { - ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::RealTimeProfile); - } else if (STI.isMClass()) { - ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::MicroControllerProfile); - } - } - - ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use, - STI.hasARMOps() ? ARMBuildAttrs::Allowed - : ARMBuildAttrs::Not_Allowed); - if (isV8M(&STI)) { - ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumbDerived); - } else if (STI.isThumb1Only()) { - ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); - } else if (STI.hasThumb2()) { - ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumb32); - } - - if (STI.hasNEON()) { - /* NEON is not exactly a VFP architecture, but GAS emit one of - * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ - if (STI.hasFPARMv8()) { - if (STI.hasCrypto()) - ATS.emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8); - else - ATS.emitFPU(ARM::FK_NEON_FP_ARMV8); - } else if (STI.hasVFP4()) - ATS.emitFPU(ARM::FK_NEON_VFPV4); - else - ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON); - // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture - if (STI.hasV8Ops()) - ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - STI.hasV8_1aOps() ? ARMBuildAttrs::AllowNeonARMv8_1a: - ARMBuildAttrs::AllowNeonARMv8); - } else { - if (STI.hasFPARMv8()) - // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one - // FPU, but there are two different names for it depending on the CPU. - ATS.emitFPU(STI.hasD16() - ? (STI.isFPOnlySP() ? ARM::FK_FPV5_SP_D16 : ARM::FK_FPV5_D16) - : ARM::FK_FP_ARMV8); - else if (STI.hasVFP4()) - ATS.emitFPU(STI.hasD16() - ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16) - : ARM::FK_VFPV4); - else if (STI.hasVFP3()) - ATS.emitFPU(STI.hasD16() - // +d16 - ? (STI.isFPOnlySP() - ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD) - : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16)) - // -d16 - : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3)); - else if (STI.hasVFP2()) - ATS.emitFPU(ARM::FK_VFPV2); - } + // Emit build attributes for the available hardware. + ATS.emitTargetAttributes(STI); // RW data addressing. if (isPositionIndependent()) { @@ -844,34 +724,17 @@ void ARMAsmPrinter::emitAttributes() { ARMBuildAttrs::Allowed); else ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model, - ARMBuildAttrs::AllowIEE754); - - if (STI.allowsUnalignedMem()) - ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access, - ARMBuildAttrs::Allowed); - else - ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access, - ARMBuildAttrs::Not_Allowed); + ARMBuildAttrs::AllowIEEE754); // FIXME: add more flags to ARMBuildAttributes.h // 8-bytes alignment stuff. ATS.emitAttribute(ARMBuildAttrs::ABI_align_needed, 1); ATS.emitAttribute(ARMBuildAttrs::ABI_align_preserved, 1); - // ABI_HardFP_use attribute to indicate single precision FP. - if (STI.isFPOnlySP()) - ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use, - ARMBuildAttrs::HardFPSinglePrecision); - // Hard float. Use both S and D registers and conform to AAPCS-VFP. if (STI.isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS); - // FIXME: Should we signal R9 usage? - - if (STI.hasFP16()) - ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP); - // FIXME: To support emitting this build attribute as GCC does, the // -mfp16-format option and associated plumbing must be // supported. For now the __fp16 type is exposed by default, so this @@ -879,21 +742,6 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format, ARMBuildAttrs::FP16FormatIEEE); - if (STI.hasMPExtension()) - ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); - - // Hardware divide in ARM mode is part of base arch, starting from ARMv8. - // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M). - // It is not possible to produce DisallowDIV: if hwdiv is present in the base - // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits. - // AllowDIVExt is only emitted if hwdiv isn't available in the base arch; - // otherwise, the default value (AllowDIVIfExists) applies. - if (STI.hasDivideInARMMode() && !STI.hasV8Ops()) - ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); - - if (STI.hasDSP() && isV8M(&STI)) - ATS.emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed); - if (MMI) { if (const Module *SourceModule = MMI->getModule()) { // ABI_PCS_wchar_t to indicate wchar_t width @@ -930,16 +778,6 @@ void ARMAsmPrinter::emitAttributes() { else ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9IsGPR); - - if (STI.hasTrustZone() && STI.hasVirtualization()) - ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, - ARMBuildAttrs::AllowTZVirtualization); - else if (STI.hasTrustZone()) - ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, - ARMBuildAttrs::AllowTZ); - else if (STI.hasVirtualization()) - ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, - ARMBuildAttrs::AllowVirtualization); } //===----------------------------------------------------------------------===// @@ -1142,6 +980,11 @@ void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) { const MachineOperand &MO1 = MI->getOperand(1); unsigned JTI = MO1.getIndex(); + // Make sure the Thumb jump table is 4-byte aligned. This will be a nop for + // ARM mode tables. + EmitAlignment(2); + + // Emit a label for the jump table. MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); OutStreamer->EmitLabel(JTISymbol); @@ -1255,11 +1098,12 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { switch (Opc) { default: - MI->dump(); + MI->print(errs()); llvm_unreachable("Unsupported opcode for unwinding information"); case ARM::tPUSH: // Special case here: no src & dst reg, but two extra imp ops. StartOp = 2; NumOffset = 2; + LLVM_FALLTHROUGH; case ARM::STMDB_UPD: case ARM::t2STMDB_UPD: case ARM::VSTMDDB_UPD: @@ -1291,7 +1135,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { int64_t Offset = 0; switch (Opc) { default: - MI->dump(); + MI->print(errs()); llvm_unreachable("Unsupported opcode for unwinding information"); case ARM::MOVr: case ARM::tMOVr: @@ -1346,11 +1190,11 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { } } } else if (DstReg == ARM::SP) { - MI->dump(); + MI->print(errs()); llvm_unreachable("Unsupported opcode for unwinding information"); } else { - MI->dump(); + MI->print(errs()); llvm_unreachable("Unsupported opcode for unwinding information"); } } @@ -1661,6 +1505,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case ARM::CONSTPOOL_ENTRY: { + if (Subtarget->genExecuteOnly()) + llvm_unreachable("execute-only should not generate constant pools"); + /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool /// in the function. The first operand is the ID# for this instruction, the /// second is the index into the MachineConstantPool that this is, the third diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 70a3246..3cf5950 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -11,34 +11,58 @@ // //===----------------------------------------------------------------------===// -#include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMFeatures.h" #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <new> +#include <utility> +#include <vector> using namespace llvm; @@ -168,9 +192,8 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress( get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) .addReg(BaseReg) .addImm(Amt) - .addImm(Pred) - .addReg(0) - .addReg(0); + .add(predOps(Pred)) + .add(condCodeOp()); } else if (Amt != 0) { ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); @@ -180,17 +203,15 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress( .addReg(OffReg) .addReg(0) .addImm(SOOpc) - .addImm(Pred) - .addReg(0) - .addReg(0); + .add(predOps(Pred)) + .add(condCodeOp()); } else UpdateMI = BuildMI(MF, MI.getDebugLoc(), get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) .addReg(BaseReg) .addReg(OffReg) - .addImm(Pred) - .addReg(0) - .addReg(0); + .add(predOps(Pred)) + .add(condCodeOp()); break; } case ARMII::AddrMode3 : { @@ -202,17 +223,15 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress( get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) .addReg(BaseReg) .addImm(Amt) - .addImm(Pred) - .addReg(0) - .addReg(0); + .add(predOps(Pred)) + .add(condCodeOp()); else UpdateMI = BuildMI(MF, MI.getDebugLoc(), get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) .addReg(BaseReg) .addReg(OffReg) - .addImm(Pred) - .addReg(0) - .addReg(0); + .add(predOps(Pred)) + .add(condCodeOp()); break; } } @@ -306,7 +325,6 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, // Walk backwards from the end of the basic block until the branch is // analyzed or we give up. while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) { - // Flag to be raised on unanalyzeable instructions. This is useful in cases // where we want to clean up on the end of the basic block before we bail // out. @@ -381,7 +399,6 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return false; } - unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { assert(!BytesRemoved && "code size not handled"); @@ -433,20 +450,24 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, if (!FBB) { if (Cond.empty()) { // Unconditional branch? if (isThumb) - BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL)); else BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); } else - BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) - .addImm(Cond[0].getImm()).addOperand(Cond[1]); + BuildMI(&MBB, DL, get(BccOpc)) + .addMBB(TBB) + .addImm(Cond[0].getImm()) + .add(Cond[1]); return 1; } // Two-way conditional branch. - BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) - .addImm(Cond[0].getImm()).addOperand(Cond[1]); + BuildMI(&MBB, DL, get(BccOpc)) + .addMBB(TBB) + .addImm(Cond[0].getImm()) + .add(Cond[1]); if (isThumb) - BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0); + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL)); else BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); return 2; @@ -537,13 +558,68 @@ bool ARMBaseInstrInfo::DefinesPredicate( return Found; } -static bool isCPSRDefined(const MachineInstr *MI) { - for (const auto &MO : MI->operands()) +bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) { + for (const auto &MO : MI.operands()) if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead()) return true; return false; } +bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Offset = MI.getOperand(Op + 1); + return Offset.getReg() != 0; +} + +// Load with negative register offset requires additional 1cyc and +I unit +// for Cortex A57 +bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Offset = MI.getOperand(Op + 1); + const MachineOperand &Opc = MI.getOperand(Op + 2); + assert(Opc.isImm()); + assert(Offset.isReg()); + int64_t OpcImm = Opc.getImm(); + + bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub; + return (isSub && Offset.getReg() != 0); +} + +bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Opc = MI.getOperand(Op + 2); + unsigned OffImm = Opc.getImm(); + return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; +} + +// Load, scaled register offset, not plus LSL2 +bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Opc = MI.getOperand(Op + 2); + unsigned OffImm = Opc.getImm(); + + bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add; + unsigned Amt = ARM_AM::getAM2Offset(OffImm); + ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm); + if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled + bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2); + return !SimpleScaled; +} + +// Minus reg for ldstso addr mode +bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI, + unsigned Op) const { + unsigned OffImm = MI.getOperand(Op + 2).getImm(); + return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; +} + +// Load, scaled register offset +bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI, + unsigned Op) const { + unsigned OffImm = MI.getOperand(Op + 2).getImm(); + return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; +} + static bool isEligibleForITBlock(const MachineInstr *MI) { switch (MI->getOpcode()) { default: return true; @@ -569,14 +645,14 @@ static bool isEligibleForITBlock(const MachineInstr *MI) { case ARM::tSUBi3: // SUB (immediate) T1 case ARM::tSUBi8: // SUB (immediate) T2 case ARM::tSUBrr: // SUB (register) T1 - return !isCPSRDefined(MI); + return !ARMBaseInstrInfo::isCPSRDefined(*MI); } } /// isPredicable - Return true if the specified instruction can be predicated. /// By default, this returns true for every instruction with a /// PredicateOperand. -bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const { +bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const { if (!MI.isPredicable()) return false; @@ -586,22 +662,25 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const { if (!isEligibleForITBlock(&MI)) return false; - ARMFunctionInfo *AFI = + const ARMFunctionInfo *AFI = MI.getParent()->getParent()->getInfo<ARMFunctionInfo>(); + // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM. + // In their ARM encoding, they can't be encoded in a conditional form. + if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) + return false; + if (AFI->isThumb2Function()) { if (getSubtarget().restrictIT()) return isV8EligibleForIT(&MI); - } else { // non-Thumb - if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) - return false; } return true; } namespace llvm { -template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) { + +template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isUndef() || MO.isUse()) @@ -614,7 +693,8 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) { // all definitions of CPSR are dead return true; } -} + +} // end namespace llvm /// GetInstSize - Return the size of the specified MachineInstr. /// @@ -698,9 +778,8 @@ void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, if (Subtarget.isMClass()) MIB.addImm(0x800); - AddDefaultPred(MIB); - - MIB.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc)); + MIB.add(predOps(ARMCC::AL)) + .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc)); } void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, @@ -718,11 +797,9 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, else MIB.addImm(8); - MIB.addReg(SrcReg, getKillRegState(KillSrc)); - - AddDefaultPred(MIB); - - MIB.addReg(ARM::CPSR, RegState::Implicit | RegState::Define); + MIB.addReg(SrcReg, getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)) + .addReg(ARM::CPSR, RegState::Implicit | RegState::Define); } void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, @@ -733,8 +810,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); if (GPRDest && GPRSrc) { - AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)))); + BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); return; } @@ -758,7 +837,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MIB.addReg(SrcReg, getKillRegState(KillSrc)); if (Opc == ARM::VORRq) MIB.addReg(SrcReg, getKillRegState(KillSrc)); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); return; } @@ -845,10 +924,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // VORR takes two source operands. if (Opc == ARM::VORRq) Mov.addReg(Src); - Mov = AddDefaultPred(Mov); + Mov = Mov.add(predOps(ARMCC::AL)); // MOVr can set CC. if (Opc == ARM::MOVr) - Mov = AddDefaultCC(Mov); + Mov = Mov.add(condCodeOp()); } // Add implicit super-register defs and kills to the last instruction. Mov->addRegisterDefined(DestReg, TRI); @@ -883,38 +962,47 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 4: if (ARM::GPRRegClass.hasSubClassEq(RC)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::STRi12)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VSTRS)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else llvm_unreachable("Unknown reg class!"); break; case 8: if (ARM::DPRRegClass.hasSubClassEq(RC)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VSTRD)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { if (Subtarget.hasV5TEOps()) { MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD)); AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); - MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); - - AddDefaultPred(MIB); + MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { // Fallback to STM instruction, which has existed since the dawn of // time. - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) - .addFrameIndex(FI).addMemOperand(MMO)); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA)) + .addFrameIndex(FI) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); } @@ -925,15 +1013,18 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (ARM::DPairRegClass.hasSubClassEq(RC)) { // Use aligned spills if the stack can be realigned. if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) - .addFrameIndex(FI).addImm(16) - .addReg(SrcReg, getKillRegState(isKill)) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VST1q64)) + .addFrameIndex(FI) + .addImm(16) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } } else llvm_unreachable("Unknown reg class!"); @@ -942,15 +1033,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (ARM::DTripleRegClass.hasSubClassEq(RC)) { // Use aligned spills if the stack can be realigned. if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo)) - .addFrameIndex(FI).addImm(16) - .addReg(SrcReg, getKillRegState(isKill)) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo)) + .addFrameIndex(FI) + .addImm(16) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + .addFrameIndex(FI) + .add(predOps(ARMCC::AL)) + .addMemOperand(MMO); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); @@ -963,15 +1056,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { // FIXME: It's possible to only store part of the QQ register if the // spilled def has a sub-register index. - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) - .addFrameIndex(FI).addImm(16) - .addReg(SrcReg, getKillRegState(isKill)) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) + .addFrameIndex(FI) + .addImm(16) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + .addFrameIndex(FI) + .add(predOps(ARMCC::AL)) + .addMemOperand(MMO); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); @@ -982,10 +1077,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 64: if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + .addFrameIndex(FI) + .add(predOps(ARMCC::AL)) + .addMemOperand(MMO); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); @@ -1065,22 +1160,31 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 4: if (ARM::GPRRegClass.hasSubClassEq(RC)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else llvm_unreachable("Unknown reg class!"); break; case 8: if (ARM::DPRRegClass.hasSubClassEq(RC)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { MachineInstrBuilder MIB; @@ -1088,14 +1192,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); - MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); - - AddDefaultPred(MIB); + MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { // Fallback to LDM instruction, which has existed since the dawn of // time. - MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA)) - .addFrameIndex(FI).addMemOperand(MMO)); + MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA)) + .addFrameIndex(FI) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); } @@ -1108,13 +1213,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, case 16: if (ARM::DPairRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) - .addFrameIndex(FI).addImm(16) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) + .addFrameIndex(FI) + .addImm(16) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) - .addFrameIndex(FI) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) + .addFrameIndex(FI) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } } else llvm_unreachable("Unknown reg class!"); @@ -1122,14 +1230,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, case 24: if (ARM::DTripleRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) - .addFrameIndex(FI).addImm(16) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) + .addFrameIndex(FI) + .addImm(16) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) - .addFrameIndex(FI) - .addMemOperand(MMO)); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) + .addFrameIndex(FI) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); @@ -1142,14 +1252,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, case 32: if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) - .addFrameIndex(FI).addImm(16) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) + .addFrameIndex(FI) + .addImm(16) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) + .addFrameIndex(FI) + .add(predOps(ARMCC::AL)) + .addMemOperand(MMO); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); @@ -1162,10 +1274,10 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 64: if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) + .addFrameIndex(FI) + .add(predOps(ARMCC::AL)) + .addMemOperand(MMO); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); @@ -1248,7 +1360,7 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD : isThumb1 ? ARM::tLDMIA_UPD : ARM::LDMIA_UPD)) - .addOperand(MI->getOperand(1)); + .add(MI->getOperand(1)); } else { LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA)); } @@ -1257,17 +1369,17 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD : isThumb1 ? ARM::tSTMIA_UPD : ARM::STMIA_UPD)) - .addOperand(MI->getOperand(0)); + .add(MI->getOperand(0)); } else { STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA)); } - AddDefaultPred(LDM.addOperand(MI->getOperand(3))); - AddDefaultPred(STM.addOperand(MI->getOperand(2))); + LDM.add(MI->getOperand(3)).add(predOps(ARMCC::AL)); + STM.add(MI->getOperand(2)).add(predOps(ARMCC::AL)); // Sort the scratch registers into ascending order. const TargetRegisterInfo &TRI = getRegisterInfo(); - llvm::SmallVector<unsigned, 6> ScratchRegs; + SmallVector<unsigned, 6> ScratchRegs; for(unsigned I = 5; I < MI->getNumOperands(); ++I) ScratchRegs.push_back(MI->getOperand(I).getReg()); std::sort(ScratchRegs.begin(), ScratchRegs.end(), @@ -1285,7 +1397,6 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { BB->erase(MI); } - bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() && @@ -1346,7 +1457,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.setDesc(get(ARM::VMOVD)); MI.getOperand(0).setReg(DstRegD); MI.getOperand(1).setReg(SrcRegD); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); // We are now reading SrcRegD instead of SrcRegS. This may upset the // register scavenger and machine verifier, so we need to indicate that we @@ -1735,39 +1846,63 @@ isProfitableToIfCvt(MachineBasicBlock &MBB, } } } - - // Attempt to estimate the relative costs of predication versus branching. - // Here we scale up each component of UnpredCost to avoid precision issue when - // scaling NumCycles by Probability. - const unsigned ScalingUpFactor = 1024; - unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor); - UnpredCost += ScalingUpFactor; // The branch itself - UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; - - return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost; + return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles, + MBB, 0, 0, Probability); } bool ARMBaseInstrInfo:: -isProfitableToIfCvt(MachineBasicBlock &TMBB, +isProfitableToIfCvt(MachineBasicBlock &TBB, unsigned TCycles, unsigned TExtra, - MachineBasicBlock &FMBB, + MachineBasicBlock &FBB, unsigned FCycles, unsigned FExtra, BranchProbability Probability) const { - if (!TCycles || !FCycles) + if (!TCycles) return false; // Attempt to estimate the relative costs of predication versus branching. // Here we scale up each component of UnpredCost to avoid precision issue when // scaling TCycles/FCycles by Probability. const unsigned ScalingUpFactor = 1024; - unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); - unsigned FUnpredCost = + + unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor; + unsigned UnpredCost; + if (!Subtarget.hasBranchPredictor()) { + // When we don't have a branch predictor it's always cheaper to not take a + // branch than take it, so we have to take that into account. + unsigned NotTakenBranchCost = 1; + unsigned TakenBranchCost = Subtarget.getMispredictionPenalty(); + unsigned TUnpredCycles, FUnpredCycles; + if (!FCycles) { + // Triangle: TBB is the fallthrough + TUnpredCycles = TCycles + NotTakenBranchCost; + FUnpredCycles = TakenBranchCost; + } else { + // Diamond: TBB is the block that is branched to, FBB is the fallthrough + TUnpredCycles = TCycles + TakenBranchCost; + FUnpredCycles = FCycles + NotTakenBranchCost; + // The branch at the end of FBB will disappear when it's predicated, so + // discount it from PredCost. + PredCost -= 1 * ScalingUpFactor; + } + // The total cost is the cost of each path scaled by their probabilites + unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor); + unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor); + UnpredCost = TUnpredCost + FUnpredCost; + // When predicating assume that the first IT can be folded away but later + // ones cost one cycle each + if (Subtarget.isThumb2() && TCycles + FCycles > 4) { + PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor; + } + } else { + unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); + unsigned FUnpredCost = Probability.getCompl().scale(FCycles * ScalingUpFactor); - unsigned UnpredCost = TUnpredCost + FUnpredCost; - UnpredCost += 1 * ScalingUpFactor; // The branch itself - UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; + UnpredCost = TUnpredCost + FUnpredCost; + UnpredCost += 1 * ScalingUpFactor; // The branch itself + UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; + } - return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost; + return PredCost <= UnpredCost; } bool @@ -1793,7 +1928,6 @@ ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI, return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); } - unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { if (Opc == ARM::B) return ARM::Bcc; @@ -1920,25 +2054,25 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI, const MCInstrDesc &DefDesc = DefMI->getDesc(); for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) - NewMI.addOperand(DefMI->getOperand(i)); + NewMI.add(DefMI->getOperand(i)); unsigned CondCode = MI.getOperand(3).getImm(); if (Invert) NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); else NewMI.addImm(CondCode); - NewMI.addOperand(MI.getOperand(4)); + NewMI.add(MI.getOperand(4)); // DefMI is not the -S version that sets CPSR, so add an optional %noreg. if (NewMI->hasOptionalDef()) - AddDefaultCC(NewMI); + NewMI.add(condCodeOp()); // The output register value when the predicate is false is an implicit // register operand tied to the first def. // The tie makes the register allocator ensure the FalseReg is allocated the // same register as operand 0. FalseReg.setImplicit(); - NewMI.addOperand(FalseReg); + NewMI.add(FalseReg); NewMI->tieOperands(0, NewMI->getNumOperands() - 1); // Update SeenMIs set: register newly created MI and erase removed DefMI. @@ -1983,6 +2117,16 @@ static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { {ARM::RSBSrsi, ARM::RSBrsi}, {ARM::RSBSrsr, ARM::RSBrsr}, + {ARM::tADDSi3, ARM::tADDi3}, + {ARM::tADDSi8, ARM::tADDi8}, + {ARM::tADDSrr, ARM::tADDrr}, + {ARM::tADCS, ARM::tADC}, + + {ARM::tSUBSi3, ARM::tSUBi3}, + {ARM::tSUBSi8, ARM::tSUBi8}, + {ARM::tSUBSrr, ARM::tSUBrr}, + {ARM::tSBCS, ARM::tSBC}, + {ARM::t2ADDSri, ARM::t2ADDri}, {ARM::t2ADDSrr, ARM::t2ADDrr}, {ARM::t2ADDSrs, ARM::t2ADDrs}, @@ -2011,9 +2155,10 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, unsigned MIFlags) { if (NumBytes == 0 && DestReg != BaseReg) { BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg) - .addReg(BaseReg, RegState::Kill) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0) - .setMIFlags(MIFlags); + .addReg(BaseReg, RegState::Kill) + .add(predOps(Pred, PredReg)) + .add(condCodeOp()) + .setMIFlags(MIFlags); return; } @@ -2033,9 +2178,11 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, // Build the new ADD / SUB. unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg, RegState::Kill).addImm(ThisVal) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0) - .setMIFlags(MIFlags); + .addReg(BaseReg, RegState::Kill) + .addImm(ThisVal) + .add(predOps(Pred, PredReg)) + .add(condCodeOp()) + .setMIFlags(MIFlags); BaseReg = DestReg; } } @@ -2154,7 +2301,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, // Add the complete list back in. MachineInstrBuilder MIB(MF, &*MI); for (int i = RegList.size() - 1; i >= 0; --i) - MIB.addOperand(RegList[i]); + MIB.add(RegList[i]); return true; } @@ -2213,33 +2360,30 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned NumBits = 0; unsigned Scale = 1; switch (AddrMode) { - case ARMII::AddrMode_i12: { + case ARMII::AddrMode_i12: ImmIdx = FrameRegIdx + 1; InstrOffs = MI.getOperand(ImmIdx).getImm(); NumBits = 12; break; - } - case ARMII::AddrMode2: { + case ARMII::AddrMode2: ImmIdx = FrameRegIdx+2; InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) InstrOffs *= -1; NumBits = 12; break; - } - case ARMII::AddrMode3: { + case ARMII::AddrMode3: ImmIdx = FrameRegIdx+2; InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) InstrOffs *= -1; NumBits = 8; break; - } case ARMII::AddrMode4: case ARMII::AddrMode6: // Can't fold any offset even if it's zero. return false; - case ARMII::AddrMode5: { + case ARMII::AddrMode5: ImmIdx = FrameRegIdx+1; InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) @@ -2247,7 +2391,6 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, NumBits = 8; Scale = 4; break; - } default: llvm_unreachable("Unsupported addressing mode!"); } @@ -2401,6 +2544,63 @@ inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, return false; } +static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) { + switch (MI->getOpcode()) { + default: return false; + case ARM::tLSLri: + case ARM::tLSRri: + case ARM::tLSLrr: + case ARM::tLSRrr: + case ARM::tSUBrr: + case ARM::tADDrr: + case ARM::tADDi3: + case ARM::tADDi8: + case ARM::tSUBi3: + case ARM::tSUBi8: + case ARM::tMUL: + IsThumb1 = true; + LLVM_FALLTHROUGH; + case ARM::RSBrr: + case ARM::RSBri: + case ARM::RSCrr: + case ARM::RSCri: + case ARM::ADDrr: + case ARM::ADDri: + case ARM::ADCrr: + case ARM::ADCri: + case ARM::SUBrr: + case ARM::SUBri: + case ARM::SBCrr: + case ARM::SBCri: + case ARM::t2RSBri: + case ARM::t2ADDrr: + case ARM::t2ADDri: + case ARM::t2ADCrr: + case ARM::t2ADCri: + case ARM::t2SUBrr: + case ARM::t2SUBri: + case ARM::t2SBCrr: + case ARM::t2SBCri: + case ARM::ANDrr: + case ARM::ANDri: + case ARM::t2ANDrr: + case ARM::t2ANDri: + case ARM::ORRrr: + case ARM::ORRri: + case ARM::t2ORRrr: + case ARM::t2ORRri: + case ARM::EORrr: + case ARM::EORri: + case ARM::t2EORrr: + case ARM::t2EORri: + case ARM::t2LSRri: + case ARM::t2LSRrr: + case ARM::t2LSLri: + case ARM::t2LSLrr: + return true; + } +} + /// optimizeCompareInstr - Convert the instruction supplying the argument to the /// comparison into one that sets the zero bit in the flags register; /// Remove a redundant Compare instruction if an earlier instruction can set the @@ -2462,6 +2662,41 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( return false; } + bool IsThumb1 = false; + if (MI && !isOptimizeCompareCandidate(MI, IsThumb1)) + return false; + + // We also want to do this peephole for cases like this: if (a*b == 0), + // and optimise away the CMP instruction from the generated code sequence: + // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values + // resulting from the select instruction, but these MOVS instructions for + // Thumb1 (V6M) are flag setting and are thus preventing this optimisation. + // However, if we only have MOVS instructions in between the CMP and the + // other instruction (the MULS in this example), then the CPSR is dead so we + // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this + // reordering and then continue the analysis hoping we can eliminate the + // CMP. This peephole works on the vregs, so is still in SSA form. As a + // consequence, the movs won't redefine/kill the MUL operands which would + // make this reordering illegal. + if (MI && IsThumb1) { + --I; + bool CanReorder = true; + const bool HasStmts = I != E; + for (; I != E; --I) { + if (I->getOpcode() != ARM::tMOVi8) { + CanReorder = false; + break; + } + } + if (HasStmts && CanReorder) { + MI = MI->removeFromParent(); + E = CmpInstr; + CmpInstr.getParent()->insert(E, MI); + } + I = CmpInstr; + E = MI; + } + // Check that CPSR isn't set between the comparison instruction and the one we // want to change. At the same time, search for Sub. const TargetRegisterInfo *TRI = &getRegisterInfo(); @@ -2497,183 +2732,128 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( if (isPredicated(*MI)) return false; - bool IsThumb1 = false; - switch (MI->getOpcode()) { - default: break; - case ARM::tLSLri: - case ARM::tLSRri: - case ARM::tLSLrr: - case ARM::tLSRrr: - case ARM::tSUBrr: - case ARM::tADDrr: - case ARM::tADDi3: - case ARM::tADDi8: - case ARM::tSUBi3: - case ARM::tSUBi8: - IsThumb1 = true; - LLVM_FALLTHROUGH; - case ARM::RSBrr: - case ARM::RSBri: - case ARM::RSCrr: - case ARM::RSCri: - case ARM::ADDrr: - case ARM::ADDri: - case ARM::ADCrr: - case ARM::ADCri: - case ARM::SUBrr: - case ARM::SUBri: - case ARM::SBCrr: - case ARM::SBCri: - case ARM::t2RSBri: - case ARM::t2ADDrr: - case ARM::t2ADDri: - case ARM::t2ADCrr: - case ARM::t2ADCri: - case ARM::t2SUBrr: - case ARM::t2SUBri: - case ARM::t2SBCrr: - case ARM::t2SBCri: - case ARM::ANDrr: - case ARM::ANDri: - case ARM::t2ANDrr: - case ARM::t2ANDri: - case ARM::ORRrr: - case ARM::ORRri: - case ARM::t2ORRrr: - case ARM::t2ORRri: - case ARM::EORrr: - case ARM::EORri: - case ARM::t2EORrr: - case ARM::t2EORri: - case ARM::t2LSRri: - case ARM::t2LSRrr: - case ARM::t2LSLri: - case ARM::t2LSLrr: { - // Scan forward for the use of CPSR - // When checking against MI: if it's a conditional code that requires - // checking of the V bit or C bit, then this is not safe to do. - // It is safe to remove CmpInstr if CPSR is redefined or killed. - // If we are done with the basic block, we need to check whether CPSR is - // live-out. - SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> - OperandsToUpdate; - bool isSafe = false; - I = CmpInstr; - E = CmpInstr.getParent()->end(); - while (!isSafe && ++I != E) { - const MachineInstr &Instr = *I; - for (unsigned IO = 0, EO = Instr.getNumOperands(); - !isSafe && IO != EO; ++IO) { - const MachineOperand &MO = Instr.getOperand(IO); - if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { - isSafe = true; - break; - } - if (!MO.isReg() || MO.getReg() != ARM::CPSR) - continue; - if (MO.isDef()) { - isSafe = true; - break; - } - // Condition code is after the operand before CPSR except for VSELs. - ARMCC::CondCodes CC; - bool IsInstrVSel = true; - switch (Instr.getOpcode()) { - default: - IsInstrVSel = false; - CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); - break; - case ARM::VSELEQD: - case ARM::VSELEQS: - CC = ARMCC::EQ; - break; - case ARM::VSELGTD: - case ARM::VSELGTS: - CC = ARMCC::GT; - break; - case ARM::VSELGED: - case ARM::VSELGES: - CC = ARMCC::GE; - break; - case ARM::VSELVSS: - case ARM::VSELVSD: - CC = ARMCC::VS; - break; - } + // Scan forward for the use of CPSR + // When checking against MI: if it's a conditional code that requires + // checking of the V bit or C bit, then this is not safe to do. + // It is safe to remove CmpInstr if CPSR is redefined or killed. + // If we are done with the basic block, we need to check whether CPSR is + // live-out. + SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> + OperandsToUpdate; + bool isSafe = false; + I = CmpInstr; + E = CmpInstr.getParent()->end(); + while (!isSafe && ++I != E) { + const MachineInstr &Instr = *I; + for (unsigned IO = 0, EO = Instr.getNumOperands(); + !isSafe && IO != EO; ++IO) { + const MachineOperand &MO = Instr.getOperand(IO); + if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { + isSafe = true; + break; + } + if (!MO.isReg() || MO.getReg() != ARM::CPSR) + continue; + if (MO.isDef()) { + isSafe = true; + break; + } + // Condition code is after the operand before CPSR except for VSELs. + ARMCC::CondCodes CC; + bool IsInstrVSel = true; + switch (Instr.getOpcode()) { + default: + IsInstrVSel = false; + CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); + break; + case ARM::VSELEQD: + case ARM::VSELEQS: + CC = ARMCC::EQ; + break; + case ARM::VSELGTD: + case ARM::VSELGTS: + CC = ARMCC::GT; + break; + case ARM::VSELGED: + case ARM::VSELGES: + CC = ARMCC::GE; + break; + case ARM::VSELVSS: + case ARM::VSELVSD: + CC = ARMCC::VS; + break; + } - if (Sub) { - ARMCC::CondCodes NewCC = getSwappedCondition(CC); - if (NewCC == ARMCC::AL) - return false; - // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based - // on CMP needs to be updated to be based on SUB. - // Push the condition code operands to OperandsToUpdate. - // If it is safe to remove CmpInstr, the condition code of these - // operands will be modified. - if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg) { - // VSel doesn't support condition code update. - if (IsInstrVSel) - return false; - OperandsToUpdate.push_back( - std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); - } - } else { - // No Sub, so this is x = <op> y, z; cmp x, 0. - switch (CC) { - case ARMCC::EQ: // Z - case ARMCC::NE: // Z - case ARMCC::MI: // N - case ARMCC::PL: // N - case ARMCC::AL: // none - // CPSR can be used multiple times, we should continue. - break; - case ARMCC::HS: // C - case ARMCC::LO: // C - case ARMCC::VS: // V - case ARMCC::VC: // V - case ARMCC::HI: // C Z - case ARMCC::LS: // C Z - case ARMCC::GE: // N V - case ARMCC::LT: // N V - case ARMCC::GT: // Z N V - case ARMCC::LE: // Z N V - // The instruction uses the V bit or C bit which is not safe. + if (Sub) { + ARMCC::CondCodes NewCC = getSwappedCondition(CC); + if (NewCC == ARMCC::AL) + return false; + // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based + // on CMP needs to be updated to be based on SUB. + // Push the condition code operands to OperandsToUpdate. + // If it is safe to remove CmpInstr, the condition code of these + // operands will be modified. + if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && + Sub->getOperand(2).getReg() == SrcReg) { + // VSel doesn't support condition code update. + if (IsInstrVSel) return false; - } + OperandsToUpdate.push_back( + std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); } - } - } - - // If CPSR is not killed nor re-defined, we should check whether it is - // live-out. If it is live-out, do not optimize. - if (!isSafe) { - MachineBasicBlock *MBB = CmpInstr.getParent(); - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) - if ((*SI)->isLiveIn(ARM::CPSR)) + } else { + // No Sub, so this is x = <op> y, z; cmp x, 0. + switch (CC) { + case ARMCC::EQ: // Z + case ARMCC::NE: // Z + case ARMCC::MI: // N + case ARMCC::PL: // N + case ARMCC::AL: // none + // CPSR can be used multiple times, we should continue. + break; + case ARMCC::HS: // C + case ARMCC::LO: // C + case ARMCC::VS: // V + case ARMCC::VC: // V + case ARMCC::HI: // C Z + case ARMCC::LS: // C Z + case ARMCC::GE: // N V + case ARMCC::LT: // N V + case ARMCC::GT: // Z N V + case ARMCC::LE: // Z N V + // The instruction uses the V bit or C bit which is not safe. return false; + } + } } + } - // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always - // set CPSR so this is represented as an explicit output) - if (!IsThumb1) { - MI->getOperand(5).setReg(ARM::CPSR); - MI->getOperand(5).setIsDef(true); - } - assert(!isPredicated(*MI) && "Can't use flags from predicated instruction"); - CmpInstr.eraseFromParent(); - - // Modify the condition code of operands in OperandsToUpdate. - // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to - // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. - for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) - OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); - return true; + // If CPSR is not killed nor re-defined, we should check whether it is + // live-out. If it is live-out, do not optimize. + if (!isSafe) { + MachineBasicBlock *MBB = CmpInstr.getParent(); + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(ARM::CPSR)) + return false; } + + // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always + // set CPSR so this is represented as an explicit output) + if (!IsThumb1) { + MI->getOperand(5).setReg(ARM::CPSR); + MI->getOperand(5).setIsDef(true); } - - return false; + assert(!isPredicated(*MI) && "Can't use flags from predicated instruction"); + CmpInstr.eraseFromParent(); + + // Modify the condition code of operands in OperandsToUpdate. + // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to + // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. + for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) + OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); + + return true; } bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, @@ -2728,7 +2908,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, switch (UseOpc) { default: break; case ARM::ADDrr: - case ARM::SUBrr: { + case ARM::SUBrr: if (UseOpc == ARM::SUBrr && Commute) return false; @@ -2744,9 +2924,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); break; - } case ARM::ORRrr: - case ARM::EORrr: { + case ARM::EORrr: if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) return false; SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); @@ -2757,9 +2936,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, case ARM::EORrr: NewUseOpc = ARM::EORri; break; } break; - } case ARM::t2ADDrr: - case ARM::t2SUBrr: { + case ARM::t2SUBrr: if (UseOpc == ARM::t2SUBrr && Commute) return false; @@ -2775,9 +2953,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); break; - } case ARM::t2ORRrr: - case ARM::t2EORrr: { + case ARM::t2EORrr: if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) return false; SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); @@ -2789,7 +2966,6 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, } break; } - } } } @@ -2797,11 +2973,12 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg1 = UseMI.getOperand(OpIdx).getReg(); bool isKill = UseMI.getOperand(OpIdx).isKill(); unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); - AddDefaultCC( - AddDefaultPred(BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), - get(NewUseOpc), NewReg) - .addReg(Reg1, getKillRegState(isKill)) - .addImm(SOImmValV1))); + BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc), + NewReg) + .addReg(Reg1, getKillRegState(isKill)) + .addImm(SOImmValV1) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); UseMI.setDesc(get(NewUseOpc)); UseMI.getOperand(1).setReg(NewReg); UseMI.getOperand(1).setIsKill(); @@ -3261,6 +3438,22 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, return DefCycle; } +bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const { + unsigned BaseReg = MI.getOperand(0).getReg(); + for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) { + const auto &Op = MI.getOperand(i); + if (Op.isReg() && Op.getReg() == BaseReg) + return true; + } + return false; +} +unsigned +ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const { + // ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops + // (outs GPR:$wb), (ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops) + return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands(); +} + int ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, @@ -3413,7 +3606,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::t2LDMDB: case ARM::t2LDMIA_UPD: case ARM::t2LDMDB_UPD: - LdmBypass = 1; + LdmBypass = true; DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); break; } @@ -3888,12 +4081,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::t2LDRs: case ARM::t2LDRBs: case ARM::t2LDRHs: - case ARM::t2LDRSHs: { + case ARM::t2LDRSHs: // Thumb2 mode: lsl 0-3 only. Latency -= 2; break; } - } } if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) @@ -4032,7 +4224,8 @@ unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const { const MCInstrDesc &MCID = MI.getDesc(); - if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) { + if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && + !Subtarget.cheapPredicableCPSRDef())) { // When predicated, CPSR is an additional source operand for CPSR updating // instructions, this apparently increases their latencies. return 1; @@ -4061,7 +4254,8 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, } const MCInstrDesc &MCID = MI.getDesc(); - if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) { + if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && + !Subtarget.cheapPredicableCPSRDef()))) { // When predicated, CPSR is an additional source operand for CPSR updating // instructions, this apparently increases their latencies. *PredCost = 1; @@ -4180,14 +4374,14 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, MachineMemOperand::MOInvariant; MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4); - MIB.addMemOperand(MMO); - AddDefaultPred(MIB); + MIB.addMemOperand(MMO).add(predOps(ARMCC::AL)); } MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); - MIB.addReg(Reg, RegState::Kill).addImm(0); - MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); - AddDefaultPred(MIB); + MIB.addReg(Reg, RegState::Kill) + .addImm(0) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()) + .add(predOps(ARMCC::AL)); } bool @@ -4222,6 +4416,7 @@ enum ARMExeDomain { ExeVFP = 1, ExeNEON = 2 }; + // // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h // @@ -4345,8 +4540,10 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) MI.setDesc(get(ARM::VORRd)); - AddDefaultPred( - MIB.addReg(DstReg, RegState::Define).addReg(SrcReg).addReg(SrcReg)); + MIB.addReg(DstReg, RegState::Define) + .addReg(SrcReg) + .addReg(SrcReg) + .add(predOps(ARMCC::AL)); break; case ARM::VMOVRS: if (Domain != ExeNEON) @@ -4366,9 +4563,10 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, // Note that DSrc has been widened and the other lane may be undef, which // contaminates the entire register. MI.setDesc(get(ARM::VGETLNi32)); - AddDefaultPred(MIB.addReg(DstReg, RegState::Define) - .addReg(DReg, RegState::Undef) - .addImm(Lane)); + MIB.addReg(DstReg, RegState::Define) + .addReg(DReg, RegState::Undef) + .addImm(Lane) + .add(predOps(ARMCC::AL)); // The old source should be an implicit use, otherwise we might think it // was dead before here. @@ -4398,8 +4596,8 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, MIB.addReg(DReg, RegState::Define) .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI))) .addReg(SrcReg) - .addImm(Lane); - AddDefaultPred(MIB); + .addImm(Lane) + .add(predOps(ARMCC::AL)); // The narrower destination must be marked as set to keep previous chains // in place. @@ -4433,8 +4631,8 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, MI.setDesc(get(ARM::VDUPLN32d)); MIB.addReg(DDst, RegState::Define) .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI))) - .addImm(SrcLane); - AddDefaultPred(MIB); + .addImm(SrcLane) + .add(predOps(ARMCC::AL)); // Neither the source or the destination are naturally represented any // more, so add them in manually. @@ -4470,10 +4668,9 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; CurUndef = !MI.readsRegister(CurReg, TRI); - NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); - - NewMIB.addImm(1); - AddDefaultPred(NewMIB); + NewMIB.addReg(CurReg, getUndefRegState(CurUndef)) + .addImm(1) + .add(predOps(ARMCC::AL)); if (SrcLane == DstLane) NewMIB.addReg(SrcReg, RegState::Implicit); @@ -4489,10 +4686,9 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI); - MIB.addReg(CurReg, getUndefRegState(CurUndef)); - - MIB.addImm(1); - AddDefaultPred(MIB); + MIB.addReg(CurReg, getUndefRegState(CurUndef)) + .addImm(1) + .add(predOps(ARMCC::AL)); if (SrcLane != DstLane) MIB.addReg(SrcReg, RegState::Implicit); @@ -4505,7 +4701,6 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, break; } } - } //===----------------------------------------------------------------------===// @@ -4613,9 +4808,9 @@ void ARMBaseInstrInfo::breakPartialRegDependency( // Insert the dependency-breaking FCONSTD before MI. // 96 is the encoding of 0.5, but the actual value doesn't matter here. - AddDefaultPred( - BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg) - .addImm(96)); + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg) + .addImm(96) + .add(predOps(ARMCC::AL)); MI.addRegisterKilled(DReg, TRI, true); } diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index b01d5c8..c52e572 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -17,16 +17,21 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Support/CodeGen.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/Target/TargetInstrInfo.h" +#include <array> +#include <cstdint> #define GET_INSTRINFO_HEADER #include "ARMGenInstrInfo.inc" namespace llvm { - class ARMSubtarget; - class ARMBaseRegisterInfo; + +class ARMBaseRegisterInfo; +class ARMSubtarget; class ARMBaseInstrInfo : public ARMGenInstrInfo { const ARMSubtarget &Subtarget; @@ -100,13 +105,9 @@ public: // Return whether the target has an explicit NOP encoding. bool hasNOP() const; - virtual void getNoopForElfTarget(MCInst &NopInst) const { - getNoopForMachoTarget(NopInst); - } - // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. - virtual unsigned getUnindexedOpcode(unsigned Opc) const =0; + virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0; MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr &MI, @@ -156,7 +157,25 @@ public: bool DefinesPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred) const override; - bool isPredicable(MachineInstr &MI) const override; + bool isPredicable(const MachineInstr &MI) const override; + + // CPSR defined in instruction + static bool isCPSRDefined(const MachineInstr &MI); + bool isAddrMode3OpImm(const MachineInstr &MI, unsigned Op) const; + bool isAddrMode3OpMinusReg(const MachineInstr &MI, unsigned Op) const; + + // Load, scaled register offset + bool isLdstScaledReg(const MachineInstr &MI, unsigned Op) const; + // Load, scaled register offset, not plus LSL2 + bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const; + // Minus reg for ldstso addr mode + bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const; + // Scaled register offset in address mode 2 + bool isAm2ScaledReg(const MachineInstr &MI, unsigned Op) const; + // Load multiple, base reg in list + bool isLDMBaseRegInList(const MachineInstr &MI) const; + // get LDM variable defs size + unsigned getLDMVariableDefsSize(const MachineInstr &MI) const; /// GetInstSize - Returns the size of the specified MachineInstr. /// @@ -399,27 +418,43 @@ public: /// Returns true if the instruction has a shift by immediate that can be /// executed in one cycle less. bool isSwiftFastImmShift(const MachineInstr *MI) const; -}; -static inline -const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) { - return MIB.addImm((int64_t)ARMCC::AL).addReg(0); -} + /// Returns predicate register associated with the given frame instruction. + unsigned getFramePred(const MachineInstr &MI) const { + assert(isFrameInstr(MI)); + // Operands of ADJCALLSTACKDOWN/ADJCALLSTACKUP: + // - argument declared in the pattern: + // 0 - frame size + // 1 - arg of CALLSEQ_START/CALLSEQ_END + // 2 - predicate code (like ARMCC::AL) + // - added by predOps: + // 3 - predicate reg + return MI.getOperand(3).getReg(); + } +}; -static inline -const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) { - return MIB.addReg(0); +/// Get the operands corresponding to the given \p Pred value. By default, the +/// predicate register is assumed to be 0 (no register), but you can pass in a +/// \p PredReg if that is not the case. +static inline std::array<MachineOperand, 2> predOps(ARMCC::CondCodes Pred, + unsigned PredReg = 0) { + return {{MachineOperand::CreateImm(static_cast<int64_t>(Pred)), + MachineOperand::CreateReg(PredReg, false)}}; } -static inline -const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB, - bool isDead = false) { - return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead)); +/// Get the operand corresponding to the conditional code result. By default, +/// this is 0 (no register). +static inline MachineOperand condCodeOp(unsigned CCReg = 0) { + return MachineOperand::CreateReg(CCReg, false); } -static inline -const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) { - return MIB.addReg(0); +/// Get the operand corresponding to the conditional code result for Thumb1. +/// This operand will always refer to CPSR and it will have the Define flag set. +/// You can optionally set the Dead flag by means of \p isDead. +static inline MachineOperand t1CondCodeOp(bool isDead = false) { + return MachineOperand::CreateReg(ARM::CPSR, + /*Define*/ true, /*Implicit*/ false, + /*Kill*/ false, isDead); } static inline @@ -517,6 +552,6 @@ bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const ARMBaseInstrInfo &TII); -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index d995c63..370c0a7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -18,25 +18,35 @@ #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> +#include <utility> #define DEBUG_TYPE "arm-register-info" @@ -46,7 +56,7 @@ using namespace llvm; ARMBaseRegisterInfo::ARMBaseRegisterInfo() - : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {} + : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC) {} static unsigned getFramePointerReg(const ARMSubtarget &STI) { return STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11; @@ -107,7 +117,7 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); if (CC == CallingConv::GHC) - // This is academic becase all GHC calls are (supposed to be) tail calls + // This is academic because all GHC calls are (supposed to be) tail calls return CSR_NoRegs_RegMask; if (STI.isTargetDarwin() && STI.getTargetLowering()->supportSwiftError() && @@ -140,7 +150,6 @@ ARMBaseRegisterInfo::getSjLjDispatchPreservedMask(const MachineFunction &MF) con return CSR_FPRegs_RegMask; } - const uint32_t * ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { @@ -154,7 +163,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, // both or otherwise does not want to enable this optimization, the function // should return NULL if (CC == CallingConv::GHC) - // This is academic becase all GHC calls are (supposed to be) tail calls + // This is academic because all GHC calls are (supposed to be) tail calls return nullptr; return STI.isTargetDarwin() ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; @@ -184,10 +193,11 @@ getReservedRegs(const MachineFunction &MF) const { for (unsigned R = 0; R < 16; ++R) markSuperRegs(Reserved, ARM::D16 + R); } - const TargetRegisterClass *RC = &ARM::GPRPairRegClass; - for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I) - for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI) - if (Reserved.test(*SI)) markSuperRegs(Reserved, *I); + const TargetRegisterClass &RC = ARM::GPRPairRegClass; + for (unsigned Reg : RC) + for (MCSubRegIterator SI(Reg, this); SI.isValid(); ++SI) + if (Reserved.test(*SI)) + markSuperRegs(Reserved, Reg); assert(checkAllSuperRegsMarked(Reserved)); return Reserved; @@ -236,11 +246,18 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, switch (RC->getID()) { default: return 0; - case ARM::tGPRRegClassID: - return TFI->hasFP(MF) ? 4 : 5; + case ARM::tGPRRegClassID: { + // hasFP ends up calling getMaxCallFrameComputed() which may not be + // available when getPressureLimit() is called as part of + // ScheduleDAGRRList. + bool HasFP = MF.getFrameInfo().isMaxCallFrameSizeComputed() + ? TFI->hasFP(MF) : true; + return 5 - HasFP; + } case ARM::GPRRegClassID: { - unsigned FP = TFI->hasFP(MF) ? 1 : 0; - return 10 - FP - (STI.isR9Reserved() ? 1 : 0); + bool HasFP = MF.getFrameInfo().isMaxCallFrameSizeComputed() + ? TFI->hasFP(MF) : true; + return 10 - HasFP - (STI.isR9Reserved() ? 1 : 0); } case ARM::SPRRegClassID: // Currently not used as 'rep' register class. case ARM::DPRRegClassID: @@ -299,8 +316,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, Hints.push_back(PairedPhys); // Then prefer even or odd registers. - for (unsigned I = 0, E = Order.size(); I != E; ++I) { - unsigned Reg = Order[I]; + for (unsigned Reg : Order) { if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd) continue; // Don't provide hints that are paired to a reserved register. @@ -425,10 +441,11 @@ void ARMBaseRegisterInfo::emitLoadConstPool( unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp)) - .addReg(DestReg, getDefRegState(true), SubIdx) - .addConstantPoolIndex(Idx) - .addImm(0).addImm(Pred).addReg(PredReg) - .setMIFlags(MIFlags); + .addReg(DestReg, getDefRegState(true), SubIdx) + .addConstantPoolIndex(Idx) + .addImm(0) + .add(predOps(Pred, PredReg)) + .setMIFlags(MIFlags); } bool ARMBaseRegisterInfo:: @@ -474,26 +491,23 @@ getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { Scale = 4; break; } - case ARMII::AddrMode2: { + case ARMII::AddrMode2: ImmIdx = Idx+2; InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm()); if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub) InstrOffs = -InstrOffs; break; - } - case ARMII::AddrMode3: { + case ARMII::AddrMode3: ImmIdx = Idx+2; InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm()); if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub) InstrOffs = -InstrOffs; break; - } - case ARMII::AddrModeT1_s: { + case ARMII::AddrModeT1_s: ImmIdx = Idx+1; InstrOffs = MI->getOperand(ImmIdx).getImm(); Scale = 4; break; - } default: llvm_unreachable("Unsupported addressing mode!"); } @@ -609,7 +623,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, .addFrameIndex(FrameIdx).addImm(Offset); if (!AFI->isThumb1OnlyFunction()) - AddDefaultCC(AddDefaultPred(MIB)); + MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); } void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, @@ -636,7 +650,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, assert(AFI->isThumb2Function()); Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII); } - assert (Done && "Unable to resolve frame index!"); + assert(Done && "Unable to resolve frame index!"); (void)Done; } @@ -645,11 +659,8 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned Ba const MCInstrDesc &Desc = MI->getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); unsigned i = 0; - - while (!MI->getOperand(i).isFI()) { - ++i; - assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!"); - } + for (; !MI->getOperand(i).isFI(); ++i) + assert(i+1 < MI->getNumOperands() && "Instr doesn't have FrameIndex operand!"); // AddrMode4 and AddrMode6 cannot handle any offset. if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) @@ -799,7 +810,8 @@ bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI, if (!DstSubReg) return true; // Small registers don't frequently cause a problem, so we can coalesce them. - if (NewRC->getSize() < 32 && DstRC->getSize() < 32 && SrcRC->getSize() < 32) + if (getRegSizeInBits(*NewRC) < 256 && getRegSizeInBits(*DstRC) < 256 && + getRegSizeInBits(*SrcRC) < 256) return true; auto NewRCWeight = diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 330e153..2e91d9d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -15,24 +15,33 @@ #define LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H #include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include <cstdint> #define GET_REGINFO_HEADER #include "ARMGenRegisterInfo.inc" namespace llvm { + /// Register allocation hints. namespace ARMRI { + enum { RegPairOdd = 1, RegPairEven = 2 }; -} + +} // end namespace ARMRI /// isARMArea1Register - Returns true if the register is a low register (r0-r7) /// or a stack/pc register that we should push/pop. static inline bool isARMArea1Register(unsigned Reg, bool isIOS) { using namespace ARM; + switch (Reg) { case R0: case R1: case R2: case R3: case R4: case R5: case R6: case R7: @@ -48,6 +57,7 @@ static inline bool isARMArea1Register(unsigned Reg, bool isIOS) { static inline bool isARMArea2Register(unsigned Reg, bool isIOS) { using namespace ARM; + switch (Reg) { case R8: case R9: case R10: case R11: case R12: // iOS has this second area. @@ -59,6 +69,7 @@ static inline bool isARMArea2Register(unsigned Reg, bool isIOS) { static inline bool isARMArea3Register(unsigned Reg, bool isIOS) { using namespace ARM; + switch (Reg) { case D15: case D14: case D13: case D12: case D11: case D10: case D9: case D8: @@ -87,7 +98,7 @@ protected: /// BasePtr - ARM physical register used as a base ptr in complex stack /// frames. I.e., when we need a 3rd base, not just SP and FP, due to /// variable size stack objects. - unsigned BasePtr; + unsigned BasePtr = ARM::R6; // Can be only subclassed. explicit ARMBaseRegisterInfo(); @@ -198,4 +209,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H diff --git a/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h b/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h index 780544f..e0cb0aa 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H #define LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H -#include "ARM.h" -#include "ARMMachineFunctionInfo.h" -using namespace llvm; +#include "llvm/Support/MathExtras.h" +#include <algorithm> +#include <cstdint> namespace llvm { @@ -44,31 +44,30 @@ struct BasicBlockInfo { /// /// Because worst case padding is used, the computed offset of an aligned /// block may not actually be aligned. - unsigned Offset; + unsigned Offset = 0; /// Size - Size of the basic block in bytes. If the block contains /// inline assembly, this is a worst case estimate. /// /// The size does not include any alignment padding whether from the /// beginning of the block, or from an aligned jump table at the end. - unsigned Size; + unsigned Size = 0; /// KnownBits - The number of low bits in Offset that are known to be /// exact. The remaining bits of Offset are an upper bound. - uint8_t KnownBits; + uint8_t KnownBits = 0; /// Unalign - When non-zero, the block contains instructions (inline asm) /// of unknown size. The real size may be smaller than Size bytes by a /// multiple of 1 << Unalign. - uint8_t Unalign; + uint8_t Unalign = 0; /// PostAlign - When non-zero, the block terminator contains a .align /// directive, so the end of the block is aligned to 1 << PostAlign /// bytes. - uint8_t PostAlign; + uint8_t PostAlign = 0; - BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0), - PostAlign(0) {} + BasicBlockInfo() = default; /// Compute the number of known offset bits internally to this block. /// This number should be used to predict worst case padding when @@ -107,4 +106,4 @@ struct BasicBlockInfo { } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp index 52c95b6..051827a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -17,8 +17,11 @@ #include "ARMBaseInstrInfo.h" #include "ARMISelLowering.h" +#include "ARMSubtarget.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; @@ -30,25 +33,61 @@ using namespace llvm; ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI) : CallLowering(&TLI) {} -static bool isSupportedType(const DataLayout DL, const ARMTargetLowering &TLI, +static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, Type *T) { - EVT VT = TLI.getValueType(DL, T); - if (!VT.isSimple() || !VT.isInteger() || VT.isVector()) + if (T->isArrayTy()) + return true; + + if (T->isStructTy()) { + // For now we only allow homogeneous structs that we can manipulate with + // G_MERGE_VALUES and G_UNMERGE_VALUES + auto StructT = cast<StructType>(T); + for (unsigned i = 1, e = StructT->getNumElements(); i != e; ++i) + if (StructT->getElementType(i) != StructT->getElementType(0)) + return false; + return true; + } + + EVT VT = TLI.getValueType(DL, T, true); + if (!VT.isSimple() || VT.isVector() || + !(VT.isInteger() || VT.isFloatingPoint())) return false; unsigned VTSize = VT.getSimpleVT().getSizeInBits(); - return VTSize == 8 || VTSize == 16 || VTSize == 32; + + if (VTSize == 64) + // FIXME: Support i64 too + return VT.isFloatingPoint(); + + return VTSize == 1 || VTSize == 8 || VTSize == 16 || VTSize == 32; } namespace { -struct FuncReturnHandler : public CallLowering::ValueHandler { - FuncReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, - MachineInstrBuilder &MIB) - : ValueHandler(MIRBuilder, MRI), MIB(MIB) {} +/// Helper class for values going out through an ABI boundary (used for handling +/// function return values and call parameters). +struct OutgoingValueHandler : public CallLowering::ValueHandler { + OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstrBuilder &MIB, CCAssignFn *AssignFn) + : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), StackSize(0) {} unsigned getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { - llvm_unreachable("Don't know how to get a stack address yet"); + assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) && + "Unsupported size"); + + LLT p0 = LLT::pointer(0, 32); + LLT s32 = LLT::scalar(32); + unsigned SPReg = MRI.createGenericVirtualRegister(p0); + MIRBuilder.buildCopy(SPReg, ARM::SP); + + unsigned OffsetReg = MRI.createGenericVirtualRegister(s32); + MIRBuilder.buildConstant(OffsetReg, Offset); + + unsigned AddrReg = MRI.createGenericVirtualRegister(p0); + MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg); + + MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset); + return AddrReg; } void assignValueToReg(unsigned ValVReg, unsigned PhysReg, @@ -56,26 +95,123 @@ struct FuncReturnHandler : public CallLowering::ValueHandler { assert(VA.isRegLoc() && "Value shouldn't be assigned to reg"); assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?"); - assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size"); - assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size"); - - assert(VA.getLocInfo() != CCValAssign::SExt && - VA.getLocInfo() != CCValAssign::ZExt && - "ABI extensions not supported yet"); + assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size"); + assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size"); - MIRBuilder.buildCopy(PhysReg, ValVReg); + unsigned ExtReg = extendRegister(ValVReg, VA); + MIRBuilder.buildCopy(PhysReg, ExtReg); MIB.addUse(PhysReg, RegState::Implicit); } void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size, MachinePointerInfo &MPO, CCValAssign &VA) override { - llvm_unreachable("Don't know how to assign a value to an address yet"); + assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) && + "Unsupported size"); + + unsigned ExtReg = extendRegister(ValVReg, VA); + auto MMO = MIRBuilder.getMF().getMachineMemOperand( + MPO, MachineMemOperand::MOStore, VA.getLocVT().getStoreSize(), + /* Alignment */ 0); + MIRBuilder.buildStore(ExtReg, Addr, *MMO); + } + + unsigned assignCustomValue(const CallLowering::ArgInfo &Arg, + ArrayRef<CCValAssign> VAs) override { + CCValAssign VA = VAs[0]; + assert(VA.needsCustom() && "Value doesn't need custom handling"); + assert(VA.getValVT() == MVT::f64 && "Unsupported type"); + + CCValAssign NextVA = VAs[1]; + assert(NextVA.needsCustom() && "Value doesn't need custom handling"); + assert(NextVA.getValVT() == MVT::f64 && "Unsupported type"); + + assert(VA.getValNo() == NextVA.getValNo() && + "Values belong to different arguments"); + + assert(VA.isRegLoc() && "Value should be in reg"); + assert(NextVA.isRegLoc() && "Value should be in reg"); + + unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)), + MRI.createGenericVirtualRegister(LLT::scalar(32))}; + MIRBuilder.buildUnmerge(NewRegs, Arg.Reg); + + bool IsLittle = MIRBuilder.getMF().getSubtarget<ARMSubtarget>().isLittle(); + if (!IsLittle) + std::swap(NewRegs[0], NewRegs[1]); + + assignValueToReg(NewRegs[0], VA.getLocReg(), VA); + assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA); + + return 1; + } + + bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + const CallLowering::ArgInfo &Info, CCState &State) override { + if (AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State)) + return true; + + StackSize = + std::max(StackSize, static_cast<uint64_t>(State.getNextStackOffset())); + return false; } MachineInstrBuilder &MIB; + uint64_t StackSize; }; } // End anonymous namespace. +void ARMCallLowering::splitToValueTypes( + const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs, + MachineFunction &MF, const SplitArgTy &PerformArgSplit) const { + const ARMTargetLowering &TLI = *getTLI<ARMTargetLowering>(); + LLVMContext &Ctx = OrigArg.Ty->getContext(); + const DataLayout &DL = MF.getDataLayout(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const Function *F = MF.getFunction(); + + SmallVector<EVT, 4> SplitVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); + + if (SplitVTs.size() == 1) { + // Even if there is no splitting to do, we still want to replace the + // original type (e.g. pointer type -> integer). + auto Flags = OrigArg.Flags; + unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty); + Flags.setOrigAlign(OriginalAlignment); + SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx), Flags, + OrigArg.IsFixed); + return; + } + + unsigned FirstRegIdx = SplitArgs.size(); + for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) { + EVT SplitVT = SplitVTs[i]; + Type *SplitTy = SplitVT.getTypeForEVT(Ctx); + auto Flags = OrigArg.Flags; + + unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy); + Flags.setOrigAlign(OriginalAlignment); + + bool NeedsConsecutiveRegisters = + TLI.functionArgumentNeedsConsecutiveRegisters( + SplitTy, F->getCallingConv(), F->isVarArg()); + if (NeedsConsecutiveRegisters) { + Flags.setInConsecutiveRegs(); + if (i == e - 1) + Flags.setInConsecutiveRegsLast(); + } + + SplitArgs.push_back( + ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)), + SplitTy, Flags, OrigArg.IsFixed}); + } + + for (unsigned i = 0; i < Offsets.size(); ++i) + PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8); +} + /// Lower the return value for the already existing \p Ret. This assumes that /// \p MIRBuilder's insertion point is correct. bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, @@ -93,21 +229,29 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, if (!isSupportedType(DL, TLI, Val->getType())) return false; + SmallVector<ArgInfo, 4> SplitVTs; + SmallVector<unsigned, 4> Regs; + ArgInfo RetInfo(VReg, Val->getType()); + setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F); + splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) { + Regs.push_back(Reg); + }); + + if (Regs.size() > 1) + MIRBuilder.buildUnmerge(Regs, VReg); + CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg()); - ArgInfo RetInfo(VReg, Val->getType()); - setArgFlags(RetInfo, AttributeSet::ReturnIndex, DL, F); - - FuncReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret); - return handleAssignments(MIRBuilder, AssignFn, RetInfo, RetHandler); + OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, AssignFn); + return handleAssignments(MIRBuilder, SplitVTs, RetHandler); } bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, unsigned VReg) const { assert(!Val == !VReg && "Return value without a vreg"); - auto Ret = AddDefaultPred(MIRBuilder.buildInstrNoInsert(ARM::BX_RET)); + auto Ret = MIRBuilder.buildInstrNoInsert(ARM::BX_RET).add(predOps(ARMCC::AL)); if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret)) return false; @@ -117,13 +261,17 @@ bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, } namespace { -struct FormalArgHandler : public CallLowering::ValueHandler { - FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) - : ValueHandler(MIRBuilder, MRI) {} +/// Helper class for values coming in through an ABI boundary (used for handling +/// formal arguments and call return values). +struct IncomingValueHandler : public CallLowering::ValueHandler { + IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + CCAssignFn AssignFn) + : ValueHandler(MIRBuilder, MRI, AssignFn) {} unsigned getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { - assert(Size == 4 && "Unsupported size"); + assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) && + "Unsupported size"); auto &MFI = MIRBuilder.getMF().getFrameInfo(); @@ -139,11 +287,30 @@ struct FormalArgHandler : public CallLowering::ValueHandler { void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size, MachinePointerInfo &MPO, CCValAssign &VA) override { - assert(Size == 4 && "Unsupported size"); + assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) && + "Unsupported size"); + + if (VA.getLocInfo() == CCValAssign::SExt || + VA.getLocInfo() == CCValAssign::ZExt) { + // If the value is zero- or sign-extended, its size becomes 4 bytes, so + // that's what we should load. + Size = 4; + assert(MRI.getType(ValVReg).isScalar() && "Only scalars supported atm"); + + auto LoadVReg = MRI.createGenericVirtualRegister(LLT::scalar(32)); + buildLoad(LoadVReg, Addr, Size, /* Alignment */ 0, MPO); + MIRBuilder.buildTrunc(ValVReg, LoadVReg); + } else { + // If the value is not extended, a simple load will suffice. + buildLoad(ValVReg, Addr, Size, /* Alignment */ 0, MPO); + } + } + void buildLoad(unsigned Val, unsigned Addr, uint64_t Size, unsigned Alignment, + MachinePointerInfo &MPO) { auto MMO = MIRBuilder.getMF().getMachineMemOperand( - MPO, MachineMemOperand::MOLoad, Size, /* Alignment */ 0); - MIRBuilder.buildLoad(ValVReg, Addr, *MMO); + MPO, MachineMemOperand::MOLoad, Size, Alignment); + MIRBuilder.buildLoad(Val, Addr, *MMO); } void assignValueToReg(unsigned ValVReg, unsigned PhysReg, @@ -151,12 +318,60 @@ struct FormalArgHandler : public CallLowering::ValueHandler { assert(VA.isRegLoc() && "Value shouldn't be assigned to reg"); assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?"); - assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size"); - assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size"); + assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size"); + assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size"); - MIRBuilder.getMBB().addLiveIn(PhysReg); + // The necessary extensions are handled on the other side of the ABI + // boundary. + markPhysRegUsed(PhysReg); MIRBuilder.buildCopy(ValVReg, PhysReg); } + + unsigned assignCustomValue(const ARMCallLowering::ArgInfo &Arg, + ArrayRef<CCValAssign> VAs) override { + CCValAssign VA = VAs[0]; + assert(VA.needsCustom() && "Value doesn't need custom handling"); + assert(VA.getValVT() == MVT::f64 && "Unsupported type"); + + CCValAssign NextVA = VAs[1]; + assert(NextVA.needsCustom() && "Value doesn't need custom handling"); + assert(NextVA.getValVT() == MVT::f64 && "Unsupported type"); + + assert(VA.getValNo() == NextVA.getValNo() && + "Values belong to different arguments"); + + assert(VA.isRegLoc() && "Value should be in reg"); + assert(NextVA.isRegLoc() && "Value should be in reg"); + + unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)), + MRI.createGenericVirtualRegister(LLT::scalar(32))}; + + assignValueToReg(NewRegs[0], VA.getLocReg(), VA); + assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA); + + bool IsLittle = MIRBuilder.getMF().getSubtarget<ARMSubtarget>().isLittle(); + if (!IsLittle) + std::swap(NewRegs[0], NewRegs[1]); + + MIRBuilder.buildMerge(Arg.Reg, NewRegs); + + return 1; + } + + /// Marking a physical register as used is different between formal + /// parameters, where it's a basic block live-in, and call returns, where it's + /// an implicit-def of the call instruction. + virtual void markPhysRegUsed(unsigned PhysReg) = 0; +}; + +struct FormalArgHandler : public IncomingValueHandler { + FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + CCAssignFn AssignFn) + : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {} + + void markPhysRegUsed(unsigned PhysReg) override { + MIRBuilder.getMBB().addLiveIn(PhysReg); + } }; } // End anonymous namespace @@ -170,34 +385,150 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, if (F.isVarArg()) return false; - auto DL = MIRBuilder.getMF().getDataLayout(); + auto &MF = MIRBuilder.getMF(); + auto &MBB = MIRBuilder.getMBB(); + auto DL = MF.getDataLayout(); auto &TLI = *getTLI<ARMTargetLowering>(); - auto &Args = F.getArgumentList(); - unsigned ArgIdx = 0; - for (auto &Arg : Args) { - ArgIdx++; - if (!isSupportedType(DL, TLI, Arg.getType())) - return false; + auto Subtarget = TLI.getSubtarget(); + + if (Subtarget->isThumb()) + return false; - // FIXME: This check as well as ArgIdx are going away as soon as we support - // loading values < 32 bits. - if (ArgIdx > 4 && Arg.getType()->getIntegerBitWidth() != 32) + for (auto &Arg : F.args()) + if (!isSupportedType(DL, TLI, Arg.getType())) return false; - } CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg()); + FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(), + AssignFn); + SmallVector<ArgInfo, 8> ArgInfos; + SmallVector<unsigned, 4> SplitRegs; unsigned Idx = 0; - for (auto &Arg : Args) { + for (auto &Arg : F.args()) { ArgInfo AInfo(VRegs[Idx], Arg.getType()); - setArgFlags(AInfo, Idx + 1, DL, F); - ArgInfos.push_back(AInfo); + setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F); + + SplitRegs.clear(); + + splitToValueTypes(AInfo, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { + SplitRegs.push_back(Reg); + }); + + if (!SplitRegs.empty()) + MIRBuilder.buildMerge(VRegs[Idx], SplitRegs); + Idx++; } - FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo()); - return handleAssignments(MIRBuilder, AssignFn, ArgInfos, ArgHandler); + if (!MBB.empty()) + MIRBuilder.setInstr(*MBB.begin()); + + return handleAssignments(MIRBuilder, ArgInfos, ArgHandler); +} + +namespace { +struct CallReturnHandler : public IncomingValueHandler { + CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstrBuilder MIB, CCAssignFn *AssignFn) + : IncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} + + void markPhysRegUsed(unsigned PhysReg) override { + MIB.addDef(PhysReg, RegState::Implicit); + } + + MachineInstrBuilder MIB; +}; +} // End anonymous namespace. + +bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, + CallingConv::ID CallConv, + const MachineOperand &Callee, + const ArgInfo &OrigRet, + ArrayRef<ArgInfo> OrigArgs) const { + MachineFunction &MF = MIRBuilder.getMF(); + const auto &TLI = *getTLI<ARMTargetLowering>(); + const auto &DL = MF.getDataLayout(); + const auto &STI = MF.getSubtarget(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + if (MF.getSubtarget<ARMSubtarget>().genLongCalls()) + return false; + + auto CallSeqStart = MIRBuilder.buildInstr(ARM::ADJCALLSTACKDOWN); + + // Create the call instruction so we can add the implicit uses of arg + // registers, but don't insert it yet. + auto MIB = MIRBuilder.buildInstrNoInsert(ARM::BLX).add(Callee).addRegMask( + TRI->getCallPreservedMask(MF, CallConv)); + if (Callee.isReg()) { + auto CalleeReg = Callee.getReg(); + if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) + MIB->getOperand(0).setReg(constrainOperandRegClass( + MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(), + *MIB.getInstr(), MIB->getDesc(), CalleeReg, 0)); + } + + SmallVector<ArgInfo, 8> ArgInfos; + for (auto Arg : OrigArgs) { + if (!isSupportedType(DL, TLI, Arg.Ty)) + return false; + + if (!Arg.IsFixed) + return false; + + SmallVector<unsigned, 8> Regs; + splitToValueTypes(Arg, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { + Regs.push_back(Reg); + }); + + if (Regs.size() > 1) + MIRBuilder.buildUnmerge(Regs, Arg.Reg); + } + + auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false); + OutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn); + if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler)) + return false; + + // Now we can add the actual call instruction to the correct basic block. + MIRBuilder.insertInstr(MIB); + + if (!OrigRet.Ty->isVoidTy()) { + if (!isSupportedType(DL, TLI, OrigRet.Ty)) + return false; + + ArgInfos.clear(); + SmallVector<unsigned, 8> SplitRegs; + splitToValueTypes(OrigRet, ArgInfos, MF, + [&](unsigned Reg, uint64_t Offset) { + SplitRegs.push_back(Reg); + }); + + auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, /*IsVarArg=*/false); + CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn); + if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler)) + return false; + + if (!SplitRegs.empty()) { + // We have split the value and allocated each individual piece, now build + // it up again. + MIRBuilder.buildMerge(OrigRet.Reg, SplitRegs); + } + } + + // We now know the size of the stack - update the ADJCALLSTACKDOWN + // accordingly. + CallSeqStart.addImm(ArgHandler.StackSize).addImm(0).add(predOps(ARMCC::AL)); + + MIRBuilder.buildInstr(ARM::ADJCALLSTACKUP) + .addImm(ArgHandler.StackSize) + .addImm(0) + .add(predOps(ARMCC::AL)); + + return true; } diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h index 6a1b886..f5a6872 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h @@ -34,9 +34,22 @@ public: bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef<unsigned> VRegs) const override; + bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, + const MachineOperand &Callee, const ArgInfo &OrigRet, + ArrayRef<ArgInfo> OrigArgs) const override; + private: bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val, unsigned VReg, MachineInstrBuilder &Ret) const; + + typedef std::function<void(unsigned Reg, uint64_t Offset)> SplitArgTy; + + /// Split an argument into one or more arguments that the CC lowering can cope + /// with (e.g. replace pointers with integers). + void splitToValueTypes(const ArgInfo &OrigArg, + SmallVectorImpl<ArgInfo> &SplitArgs, + MachineFunction &MF, + const SplitArgTy &PerformArgSplit) const; }; } // End of namespace llvm #endif diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td index 7a7b7fe..bc7afdb 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td @@ -273,9 +273,9 @@ def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>; def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS_ThisReturn, R9))>; -def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP, - (sequence "R%u", 12, 1), - (sequence "D%u", 31, 0))>; +def CSR_iOS_TLSCall + : CalleeSavedRegs<(add LR, SP, (sub(sequence "R%u", 12, 1), R9, R12), + (sequence "D%u", 31, 0))>; // C++ TLS access function saves all registers except SP. Try to match // the order of CSRs in CSR_iOS. diff --git a/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp b/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp index 64f187d..e145d0a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp @@ -8,7 +8,15 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMBaseInstrInfo.h" #include "ARMBasicBlockInfo.h" +#include "ARMMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <vector> + using namespace llvm; namespace llvm { @@ -69,4 +77,4 @@ std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF) { return BBInfo; } -} // end namespace +} // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index be1a37e..667337d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -14,30 +14,50 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMBaseInstrInfo.h" #include "ARMBasicBlockInfo.h" #include "ARMMachineFunctionInfo.h" -#include "MCTargetDesc/ARMAddressingModes.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "Thumb2InstrInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <new> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "arm-cp-islands" +#define ARM_CP_ISLANDS_OPT_NAME \ + "ARM constant island placement and branch shortening pass" STATISTIC(NumCPEs, "Number of constpool entries"); STATISTIC(NumSplit, "Number of uncond branches inserted"); STATISTIC(NumCBrFixed, "Number of cond branches fixed"); @@ -49,7 +69,6 @@ STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed"); STATISTIC(NumJTMoved, "Number of jump table destination blocks moved"); STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted"); - static cl::opt<bool> AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), cl::desc("Adjust basic block layout to better use TB[BH]")); @@ -64,6 +83,7 @@ static cl::opt<bool> SynthesizeThumb1TBB( "equivalent to the TBB/TBH instructions")); namespace { + /// ARMConstantIslands - Due to limited PC-relative displacements, ARM /// requires constant pool entries to be scattered among the instructions /// inside a function. To do this, it completely ignores the normal LLVM @@ -76,7 +96,6 @@ namespace { /// CPE - A constant pool entry that has been placed somewhere, which /// tracks a list of users. class ARMConstantIslands : public MachineFunctionPass { - std::vector<BasicBlockInfo> BBInfo; /// WaterList - A sorted list of basic blocks where islands could be placed @@ -110,12 +129,14 @@ namespace { bool NegOk; bool IsSoImm; bool KnownAlignment; + CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp, bool neg, bool soimm) : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm), KnownAlignment(false) { HighWaterMark = CPEMI->getParent(); } + /// getMaxDisp - Returns the maximum displacement supported by MI. /// Correct for unknown alignment. /// Conservatively subtract 2 bytes to handle weird alignment effects. @@ -135,6 +156,7 @@ namespace { MachineInstr *CPEMI; unsigned CPI; unsigned RefCount; + CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0) : CPEMI(cpemi), CPI(cpi), RefCount(rc) {} }; @@ -148,7 +170,7 @@ namespace { /// The first half of CPEntries contains generic constants, the second half /// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up /// which vector it will be in here. - std::vector<std::vector<CPEntry> > CPEntries; + std::vector<std::vector<CPEntry>> CPEntries; /// Maps a JT index to the offset in CPEntries containing copies of that /// table. The equivalent map for a CONSTPOOL_ENTRY is the identity. @@ -167,6 +189,7 @@ namespace { unsigned MaxDisp : 31; bool isCond : 1; unsigned UncondBr; + ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, unsigned ubr) : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {} }; @@ -195,8 +218,10 @@ namespace { bool isThumb1; bool isThumb2; bool isPositionIndependentOrROPI; + public: static char ID; + ARMConstantIslands() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -207,7 +232,7 @@ namespace { } StringRef getPassName() const override { - return "ARM constant island placement and branch shortening pass"; + return ARM_CP_ISLANDS_OPT_NAME; } private: @@ -264,8 +289,10 @@ namespace { U.getMaxDisp(), U.NegOk, U.IsSoImm); } }; + char ARMConstantIslands::ID = 0; -} + +} // end anonymous namespace /// verify - check BBOffsets, BBSizes, alignment of islands void ARMConstantIslands::verify() { @@ -295,8 +322,9 @@ void ARMConstantIslands::verify() { #endif } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// print block size and offset information - debugging -void ARMConstantIslands::dumpBBs() { +LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() { DEBUG({ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { const BasicBlockInfo &BBI = BBInfo[J]; @@ -308,12 +336,7 @@ void ARMConstantIslands::dumpBBs() { } }); } - -/// createARMConstantIslandPass - returns an instance of the constpool -/// island pass. -FunctionPass *llvm::createARMConstantIslandPass() { - return new ARMConstantIslands(); -} +#endif bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { MF = &mf; @@ -782,6 +805,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { case ARM::LDRcp: case ARM::t2LDRpci: case ARM::t2LDRHpci: + case ARM::t2LDRBpci: Bits = 12; // +-offset_12 NegOk = true; break; @@ -873,7 +897,6 @@ void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) { WaterList.insert(IP, NewBB); } - /// Split the basic block containing MI into two blocks, which are joined by /// an unconditional branch. Update data structures and renumber blocks to /// account for this change and returns the newly created block. @@ -897,8 +920,9 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { if (!isThumb) BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB); else - BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB) - .addImm(ARMCC::AL).addReg(0); + BuildMI(OrigBB, DebugLoc(), TII->get(Opc)) + .addMBB(NewBB) + .add(predOps(ARMCC::AL)); ++NumSplit; // Update the CFG. All succs of OrigBB are now succs of NewBB. @@ -1296,8 +1320,9 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, if (!isThumb) BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB); else - BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB) - .addImm(ARMCC::AL).addReg(0); + BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)) + .addMBB(NewMBB) + .add(predOps(ARMCC::AL)); unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); ImmBranches.push_back(ImmBranch(&UserMBB->back(), MaxDisp, false, UncondBr)); @@ -1477,7 +1502,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex, // add it to the island. U.HighWaterMark = NewIsland; U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc()) - .addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size); + .addImm(ID) + .add(CPEMI->getOperand(1)) + .addImm(Size); CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); ++NumCPEs; @@ -1681,8 +1708,9 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { Br.MI = &MBB->back(); BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); if (isThumb) - BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB) - .addImm(ARMCC::AL).addReg(0); + BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)) + .addMBB(DestBB) + .add(predOps(ARMCC::AL)); else BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); @@ -1709,8 +1737,14 @@ bool ARMConstantIslands::undoLRSpillRestore() { MI->getNumExplicitOperands() == 3) { // Create the new insn and copy the predicate from the old. BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)); + .add(MI->getOperand(0)) + .add(MI->getOperand(1)); + MI->eraseFromParent(); + MadeChange = true; + } else if (MI->getOpcode() == ARM::tPUSH && + MI->getOperand(2).getReg() == ARM::LR && + MI->getNumExplicitOperands() == 3) { + // Just remove the push. MI->eraseFromParent(); MadeChange = true; } @@ -1792,13 +1826,12 @@ bool ARMConstantIslands::optimizeThumb2Branches() { Bits = 11; Scale = 2; break; - case ARM::t2Bcc: { + case ARM::t2Bcc: NewOpc = ARM::tBcc; Bits = 8; Scale = 2; break; } - } if (NewOpc) { unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); @@ -1983,6 +2016,54 @@ static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) { &*MBB->begin() == CPEMI; } +static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI, + MachineInstr *JumpMI, + unsigned &DeadSize) { + // Remove a dead add between the LEA and JT, which used to compute EntryReg, + // but the JT now uses PC. Finds the last ADD (if any) that def's EntryReg + // and is not clobbered / used. + MachineInstr *RemovableAdd = nullptr; + unsigned EntryReg = JumpMI->getOperand(0).getReg(); + + // Find the last ADD to set EntryReg + MachineBasicBlock::iterator I(LEAMI); + for (++I; &*I != JumpMI; ++I) { + if (I->getOpcode() == ARM::t2ADDrs && I->getOperand(0).getReg() == EntryReg) + RemovableAdd = &*I; + } + + if (!RemovableAdd) + return; + + // Ensure EntryReg is not clobbered or used. + MachineBasicBlock::iterator J(RemovableAdd); + for (++J; &*J != JumpMI; ++J) { + for (unsigned K = 0, E = J->getNumOperands(); K != E; ++K) { + const MachineOperand &MO = J->getOperand(K); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef() && MO.getReg() == EntryReg) + return; + if (MO.isUse() && MO.getReg() == EntryReg) + return; + } + } + + DEBUG(dbgs() << "Removing Dead Add: " << *RemovableAdd); + RemovableAdd->eraseFromParent(); + DeadSize += 4; +} + +static bool registerDefinedBetween(unsigned Reg, + MachineBasicBlock::iterator From, + MachineBasicBlock::iterator To, + const TargetRegisterInfo *TRI) { + for (auto I = From; I != To; ++I) + if (I->modifiesRegister(Reg, TRI)) + return true; + return false; +} + /// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller /// jumptables when it's possible. bool ARMConstantIslands::optimizeThumb2JumpTables() { @@ -2060,6 +2141,12 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { IdxReg = Shift->getOperand(2).getReg(); unsigned ShiftedIdxReg = Shift->getOperand(0).getReg(); + // It's important that IdxReg is live until the actual TBB/TBH. Most of + // the range is checked later, but the LEA might still clobber it and not + // actually get removed. + if (BaseReg == IdxReg && !jumpTableFollowsTB(MI, User.CPEMI)) + continue; + MachineInstr *Load = User.MI->getNextNode(); if (Load->getOpcode() != ARM::tLDRr) continue; @@ -2069,6 +2156,16 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { continue; // If we're in PIC mode, there should be another ADD following. + auto *TRI = STI->getRegisterInfo(); + + // %base cannot be redefined after the load as it will appear before + // TBB/TBH like: + // %base = + // %base = + // tBB %base, %idx + if (registerDefinedBetween(BaseReg, Load->getNextNode(), MBB->end(), TRI)) + continue; + if (isPositionIndependentOrROPI) { MachineInstr *Add = Load->getNextNode(); if (Add->getOpcode() != ARM::tADDrr || @@ -2078,22 +2175,26 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { continue; if (Add->getOperand(0).getReg() != MI->getOperand(0).getReg()) continue; - + if (registerDefinedBetween(IdxReg, Add->getNextNode(), MI, TRI)) + // IdxReg gets redefined in the middle of the sequence. + continue; Add->eraseFromParent(); DeadSize += 2; } else { if (Load->getOperand(0).getReg() != MI->getOperand(0).getReg()) continue; + if (registerDefinedBetween(IdxReg, Load->getNextNode(), MI, TRI)) + // IdxReg gets redefined in the middle of the sequence. + continue; } - - + // Now safe to delete the load and lsl. The LEA will be removed later. CanDeleteLEA = true; Shift->eraseFromParent(); Load->eraseFromParent(); DeadSize += 4; } - + DEBUG(dbgs() << "Shrink JT: " << *MI); MachineInstr *CPEMI = User.CPEMI; unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; @@ -2117,7 +2218,10 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { NewJTMI->getOperand(0).setReg(ARM::PC); NewJTMI->getOperand(0).setIsKill(false); - if (CanDeleteLEA) { + if (CanDeleteLEA) { + if (isThumb2) + RemoveDeadAddBetweenLEAAndJT(User.MI, MI, DeadSize); + User.MI->eraseFromParent(); DeadSize += isThumb2 ? 4 : 2; @@ -2238,13 +2342,11 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { if (isThumb2) BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)) .addMBB(BB) - .addImm(ARMCC::AL) - .addReg(0); + .add(predOps(ARMCC::AL)); else BuildMI(NewBB, DebugLoc(), TII->get(ARM::tB)) .addMBB(BB) - .addImm(ARMCC::AL) - .addReg(0); + .add(predOps(ARMCC::AL)); // Update internal data structures to account for the newly inserted MBB. MF->RenumberBlocks(NewBB); @@ -2256,3 +2358,12 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { ++NumJTInserted; return NewBB; } + +/// createARMConstantIslandPass - returns an instance of the constpool +/// island pass. +FunctionPass *llvm::createARMConstantIslandPass() { + return new ARMConstantIslands(); +} + +INITIALIZE_PASS(ARMConstantIslands, "arm-cp-islands", ARM_CP_ISLANDS_OPT_NAME, + false, false) diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp index 2d16028..9705c8b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -13,13 +13,17 @@ #include "ARMConstantPoolValue.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Type.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include <cstdlib> + using namespace llvm; //===----------------------------------------------------------------------===// @@ -44,7 +48,7 @@ ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, unsigned id, LabelId(id), Kind(kind), PCAdjust(PCAdj), Modifier(modifier), AddCurrentAddress(addCurrentAddress) {} -ARMConstantPoolValue::~ARMConstantPoolValue() {} +ARMConstantPoolValue::~ARMConstantPoolValue() = default; StringRef ARMConstantPoolValue::getModifierText() const { switch (Modifier) { @@ -94,9 +98,11 @@ ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) { return false; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void ARMConstantPoolValue::dump() const { errs() << " " << *this; } +#endif void ARMConstantPoolValue::print(raw_ostream &O) const { if (Modifier) O << "(" << getModifierText() << ")"; diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h index 5f61832..61c5215 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h @@ -14,10 +14,11 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H #define LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ErrorHandling.h" -#include <cstddef> +#include <string> +#include <vector> namespace llvm { @@ -29,6 +30,7 @@ class LLVMContext; class MachineBasicBlock; namespace ARMCP { + enum ARMCPKind { CPValue, CPExtSymbol, @@ -47,7 +49,8 @@ namespace ARMCP { SECREL, /// Section Relative (Windows TLS) SBREL, /// Static Base Relative (RWPI) }; -} + +} // end namespace ARMCP /// ARMConstantPoolValue - ARM specific constantpool value. This is used to /// represent PC-relative displacement between the address of the load @@ -169,9 +172,11 @@ public: const GlobalValue *getGV() const; const BlockAddress *getBlockAddress() const; + const GlobalVariable *getPromotedGlobal() const { return dyn_cast_or_null<GlobalVariable>(GVar); } + const Constant *getPromotedGlobalInit() const { return CVal; } @@ -186,6 +191,7 @@ public: void addSelectionDAGCSEId(FoldingSetNodeID &ID) override; void print(raw_ostream &O) const override; + static bool classof(const ARMConstantPoolValue *APV) { return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA() || APV->isPromotedGlobal(); @@ -267,6 +273,6 @@ public: } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index baa4e03..46d8f0d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -19,6 +19,7 @@ #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -30,6 +31,7 @@ #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove! #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" + using namespace llvm; #define DEBUG_TYPE "arm-pseudo" @@ -97,9 +99,9 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, const MachineOperand &MO = OldMI.getOperand(i); assert(MO.isReg() && MO.getReg()); if (MO.isUse()) - UseMI.addOperand(MO); + UseMI.add(MO); else - DefMI.addOperand(MO); + DefMI.add(MO); } } @@ -415,14 +417,14 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); if (TableEntry->isUpdating) - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the am6offset operand. if (TableEntry->hasWritebackOperand) - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // For an instruction writing double-spaced subregs, the pseudo instruction // has an extra operand that is a use of the super-register. Record the @@ -432,15 +434,15 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { SrcOpIdx = OpIdx++; // Copy the predicate operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the super-register source operand used for double-spaced subregs over // to the new instruction as an implicit operand. if (SrcOpIdx != 0) { MachineOperand MO = MI.getOperand(SrcOpIdx); MO.setImplicit(true); - MIB.addOperand(MO); + MIB.add(MO); } // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); @@ -467,14 +469,14 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { TII->get(TableEntry->RealOpc)); unsigned OpIdx = 0; if (TableEntry->isUpdating) - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the am6offset operand. if (TableEntry->hasWritebackOperand) - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); bool SrcIsKill = MI.getOperand(OpIdx).isKill(); bool SrcIsUndef = MI.getOperand(OpIdx).isUndef(); @@ -490,8 +492,8 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { MIB.addReg(D3, getUndefRegState(SrcIsUndef)); // Copy the predicate operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg. MIB->addRegisterKilled(SrcReg, TRI, true); @@ -549,14 +551,14 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { } if (TableEntry->isUpdating) - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the am6offset operand. if (TableEntry->hasWritebackOperand) - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Grab the super-register source. MachineOperand MO = MI.getOperand(OpIdx++); @@ -579,12 +581,12 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { OpIdx += 1; // Copy the predicate operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the super-register source to be an implicit source. MO.setImplicit(true); - MIB.addOperand(MO); + MIB.add(MO); if (TableEntry->IsLoad) // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); @@ -605,9 +607,9 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, unsigned OpIdx = 0; // Transfer the destination register operand. - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); if (IsExt) - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); bool SrcIsKill = MI.getOperand(OpIdx).isKill(); unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); @@ -616,11 +618,11 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, MIB.addReg(D0); // Copy the other source register operand. - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the predicate operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Add an implicit kill and use for the super-reg. MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill)); @@ -696,8 +698,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, HI16 = HI16.addImm(SOImmValV2); LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - LO16.addImm(Pred).addReg(PredReg).addReg(0); - HI16.addImm(Pred).addReg(PredReg).addReg(0); + LO16.addImm(Pred).addReg(PredReg).add(condCodeOp()); + HI16.addImm(Pred).addReg(PredReg).add(condCodeOp()); TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); return; @@ -755,14 +757,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MI.eraseFromParent(); } -static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) { - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - MBB->addLiveIn(*I); -} - /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as -/// possible. This only gets used at -O0 so we don't care about efficiency of the -/// generated code. +/// possible. This only gets used at -O0 so we don't care about efficiency of +/// the generated code. bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdrexOp, unsigned StrexOp, @@ -771,16 +768,14 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, bool IsThumb = STI->isThumb(); MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); - MachineOperand &Dest = MI.getOperand(0); - unsigned StatusReg = MI.getOperand(1).getReg(); - MachineOperand &Addr = MI.getOperand(2); - MachineOperand &Desired = MI.getOperand(3); - MachineOperand &New = MI.getOperand(4); - - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); + const MachineOperand &Dest = MI.getOperand(0); + unsigned TempReg = MI.getOperand(1).getReg(); + // Duplicating undef operands into 2 instructions does not guarantee the same + // value on both; However undef should be replaced by xzr anyway. + assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); + unsigned AddrReg = MI.getOperand(2).getReg(); + unsigned DesiredReg = MI.getOperand(3).getReg(); + unsigned NewReg = MI.getOperand(4).getReg(); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -793,33 +788,30 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, if (UxtOp) { MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, DL, TII->get(UxtOp), Desired.getReg()) - .addReg(Desired.getReg(), RegState::Kill); + BuildMI(MBB, MBBI, DL, TII->get(UxtOp), DesiredReg) + .addReg(DesiredReg, RegState::Kill); if (!IsThumb) MIB.addImm(0); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); } // .Lloadcmp: // ldrex rDest, [rAddr] // cmp rDest, rDesired // bne .Ldone - LoadCmpBB->addLiveIn(Addr.getReg()); - LoadCmpBB->addLiveIn(Dest.getReg()); - LoadCmpBB->addLiveIn(Desired.getReg()); - addPostLoopLiveIns(LoadCmpBB, LiveRegs); MachineInstrBuilder MIB; MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg()); - MIB.addReg(Addr.getReg()); + MIB.addReg(AddrReg); if (LdrexOp == ARM::t2LDREX) MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset. - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; - AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) - .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) - .addOperand(Desired)); + BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) + .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) + .addReg(DesiredReg) + .add(predOps(ARMCC::AL)); unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; BuildMI(LoadCmpBB, DL, TII->get(Bcc)) .addMBB(DoneBB) @@ -829,25 +821,21 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, LoadCmpBB->addSuccessor(StoreBB); // .Lstore: - // strex rStatus, rNew, [rAddr] - // cmp rStatus, #0 + // strex rTempReg, rNew, [rAddr] + // cmp rTempReg, #0 // bne .Lloadcmp - StoreBB->addLiveIn(Addr.getReg()); - StoreBB->addLiveIn(New.getReg()); - addPostLoopLiveIns(StoreBB, LiveRegs); - - - MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg); - MIB.addOperand(New); - MIB.addOperand(Addr); + MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg) + .addReg(NewReg) + .addReg(AddrReg); if (StrexOp == ARM::t2STREX) MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset. - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; - AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri)) - .addReg(StatusReg, RegState::Kill) - .addImm(0)); + BuildMI(StoreBB, DL, TII->get(CMPri)) + .addReg(TempReg, RegState::Kill) + .addImm(0) + .add(predOps(ARMCC::AL)); BuildMI(StoreBB, DL, TII->get(Bcc)) .addMBB(LoadCmpBB) .addImm(ARMCC::NE) @@ -857,12 +845,24 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->transferSuccessors(&MBB); - addPostLoopLiveIns(DoneBB, LiveRegs); MBB.addSuccessor(LoadCmpBB); NextMBBI = MBB.end(); MI.eraseFromParent(); + + // Recompute livein lists. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + LivePhysRegs LiveRegs; + computeLiveIns(LiveRegs, MRI, *DoneBB); + computeLiveIns(LiveRegs, MRI, *StoreBB); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + // Do an extra pass around the loop to get loop carried registers right. + StoreBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *StoreBB); + LoadCmpBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + return true; } @@ -889,20 +889,19 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); MachineOperand &Dest = MI.getOperand(0); - unsigned StatusReg = MI.getOperand(1).getReg(); - MachineOperand &Addr = MI.getOperand(2); - MachineOperand &Desired = MI.getOperand(3); - MachineOperand &New = MI.getOperand(4); + unsigned TempReg = MI.getOperand(1).getReg(); + // Duplicating undef operands into 2 instructions does not guarantee the same + // value on both; However undef should be replaced by xzr anyway. + assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); + unsigned AddrReg = MI.getOperand(2).getReg(); + unsigned DesiredReg = MI.getOperand(3).getReg(); + MachineOperand New = MI.getOperand(4); + New.setIsKill(false); unsigned DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0); unsigned DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1); - unsigned DesiredLo = TRI->getSubReg(Desired.getReg(), ARM::gsub_0); - unsigned DesiredHi = TRI->getSubReg(Desired.getReg(), ARM::gsub_1); - - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); + unsigned DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0); + unsigned DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -916,28 +915,23 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, // .Lloadcmp: // ldrexd rDestLo, rDestHi, [rAddr] // cmp rDestLo, rDesiredLo - // sbcs rStatus<dead>, rDestHi, rDesiredHi + // sbcs rTempReg<dead>, rDestHi, rDesiredHi // bne .Ldone - LoadCmpBB->addLiveIn(Addr.getReg()); - LoadCmpBB->addLiveIn(Dest.getReg()); - LoadCmpBB->addLiveIn(Desired.getReg()); - addPostLoopLiveIns(LoadCmpBB, LiveRegs); - unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; MachineInstrBuilder MIB; MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD)); addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI); - MIB.addReg(Addr.getReg()); - AddDefaultPred(MIB); + MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; - AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) - .addReg(DestLo, getKillRegState(Dest.isDead())) - .addReg(DesiredLo, getKillRegState(Desired.isDead()))); + BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) + .addReg(DestLo, getKillRegState(Dest.isDead())) + .addReg(DesiredLo) + .add(predOps(ARMCC::AL)); BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) .addReg(DestHi, getKillRegState(Dest.isDead())) - .addReg(DesiredHi, getKillRegState(Desired.isDead())) + .addReg(DesiredHi) .addImm(ARMCC::EQ).addReg(ARM::CPSR, RegState::Kill); unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; @@ -949,23 +943,19 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, LoadCmpBB->addSuccessor(StoreBB); // .Lstore: - // strexd rStatus, rNewLo, rNewHi, [rAddr] - // cmp rStatus, #0 + // strexd rTempReg, rNewLo, rNewHi, [rAddr] + // cmp rTempReg, #0 // bne .Lloadcmp - StoreBB->addLiveIn(Addr.getReg()); - StoreBB->addLiveIn(New.getReg()); - addPostLoopLiveIns(StoreBB, LiveRegs); - unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD; - MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg); + MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg); addExclusiveRegPair(MIB, New, 0, IsThumb, TRI); - MIB.addOperand(Addr); - AddDefaultPred(MIB); + MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; - AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri)) - .addReg(StatusReg, RegState::Kill) - .addImm(0)); + BuildMI(StoreBB, DL, TII->get(CMPri)) + .addReg(TempReg, RegState::Kill) + .addImm(0) + .add(predOps(ARMCC::AL)); BuildMI(StoreBB, DL, TII->get(Bcc)) .addMBB(LoadCmpBB) .addImm(ARMCC::NE) @@ -975,12 +965,24 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->transferSuccessors(&MBB); - addPostLoopLiveIns(DoneBB, LiveRegs); MBB.addSuccessor(LoadCmpBB); NextMBBI = MBB.end(); MI.eraseFromParent(); + + // Recompute livein lists. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + LivePhysRegs LiveRegs; + computeLiveIns(LiveRegs, MRI, *DoneBB); + computeLiveIns(LiveRegs, MRI, *StoreBB); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + // Do an extra pass around the loop to get loop carried registers right. + StoreBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *StoreBB); + LoadCmpBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + return true; } @@ -1026,7 +1028,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Add the default predicate in Thumb mode. if (STI->isThumb()) - MIB.addImm(ARMCC::AL).addReg(0); + MIB.add(predOps(ARMCC::AL)); } else if (RetOpcode == ARM::TCRETURNri) { BuildMI(MBB, MBBI, dl, TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)) @@ -1047,9 +1049,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc), MI.getOperand(1).getReg()) - .addOperand(MI.getOperand(2)) - .addImm(MI.getOperand(3).getImm()) // 'pred' - .addOperand(MI.getOperand(4)); + .add(MI.getOperand(2)) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .add(MI.getOperand(4)); MI.eraseFromParent(); return true; @@ -1059,10 +1061,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) - .addOperand(MI.getOperand(2)) - .addImm(MI.getOperand(3).getImm()) // 'pred' - .addOperand(MI.getOperand(4)) - .addReg(0); // 's' bit + .add(MI.getOperand(2)) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .add(MI.getOperand(4)) + .add(condCodeOp()); // 's' bit MI.eraseFromParent(); return true; @@ -1070,11 +1072,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::MOVCCsi: { BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), (MI.getOperand(1).getReg())) - .addOperand(MI.getOperand(2)) - .addImm(MI.getOperand(3).getImm()) - .addImm(MI.getOperand(4).getImm()) // 'pred' - .addOperand(MI.getOperand(5)) - .addReg(0); // 's' bit + .add(MI.getOperand(2)) + .addImm(MI.getOperand(3).getImm()) + .addImm(MI.getOperand(4).getImm()) // 'pred' + .add(MI.getOperand(5)) + .add(condCodeOp()); // 's' bit MI.eraseFromParent(); return true; @@ -1082,12 +1084,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::MOVCCsr: { BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr), (MI.getOperand(1).getReg())) - .addOperand(MI.getOperand(2)) - .addOperand(MI.getOperand(3)) - .addImm(MI.getOperand(4).getImm()) - .addImm(MI.getOperand(5).getImm()) // 'pred' - .addOperand(MI.getOperand(6)) - .addReg(0); // 's' bit + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .addImm(MI.getOperand(4).getImm()) + .addImm(MI.getOperand(5).getImm()) // 'pred' + .add(MI.getOperand(6)) + .add(condCodeOp()); // 's' bit MI.eraseFromParent(); return true; @@ -1097,9 +1099,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), MI.getOperand(1).getReg()) - .addImm(MI.getOperand(2).getImm()) - .addImm(MI.getOperand(3).getImm()) // 'pred' - .addOperand(MI.getOperand(4)); + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .add(MI.getOperand(4)); MI.eraseFromParent(); return true; } @@ -1108,10 +1110,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) - .addImm(MI.getOperand(2).getImm()) - .addImm(MI.getOperand(3).getImm()) // 'pred' - .addOperand(MI.getOperand(4)) - .addReg(0); // 's' bit + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .add(MI.getOperand(4)) + .add(condCodeOp()); // 's' bit MI.eraseFromParent(); return true; @@ -1121,10 +1123,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) - .addImm(MI.getOperand(2).getImm()) - .addImm(MI.getOperand(3).getImm()) // 'pred' - .addOperand(MI.getOperand(4)) - .addReg(0); // 's' bit + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .add(MI.getOperand(4)) + .add(condCodeOp()); // 's' bit MI.eraseFromParent(); return true; @@ -1143,11 +1145,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, } BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), MI.getOperand(1).getReg()) - .addOperand(MI.getOperand(2)) - .addImm(MI.getOperand(3).getImm()) - .addImm(MI.getOperand(4).getImm()) // 'pred' - .addOperand(MI.getOperand(5)) - .addReg(0); // 's' bit + .add(MI.getOperand(2)) + .addImm(MI.getOperand(3).getImm()) + .addImm(MI.getOperand(4).getImm()) // 'pred' + .add(MI.getOperand(5)) + .add(condCodeOp()); // 's' bit MI.eraseFromParent(); return true; } @@ -1187,10 +1189,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, "bits set."); unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri; - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(bicOpc), ARM::R6) - .addReg(ARM::R6, RegState::Kill) - .addImm(MaxAlign-1))); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6) + .addReg(ARM::R6, RegState::Kill) + .addImm(MaxAlign - 1) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } } @@ -1201,24 +1204,25 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::MOVsrl_flag: case ARM::MOVsra_flag: { // These are just fancy MOVs instructions. - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), - MI.getOperand(0).getReg()) - .addOperand(MI.getOperand(1)) - .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? - ARM_AM::lsr : ARM_AM::asr), - 1))) - .addReg(ARM::CPSR, RegState::Define); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), + MI.getOperand(0).getReg()) + .add(MI.getOperand(1)) + .addImm(ARM_AM::getSORegOpc( + (Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr : ARM_AM::asr), 1)) + .add(predOps(ARMCC::AL)) + .addReg(ARM::CPSR, RegState::Define); MI.eraseFromParent(); return true; } case ARM::RRX: { // This encodes as "MOVs Rd, Rm, rrx MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),TII->get(ARM::MOVsi), - MI.getOperand(0).getReg()) - .addOperand(MI.getOperand(1)) - .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))) - .addReg(0); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), + MI.getOperand(0).getReg()) + .add(MI.getOperand(1)) + .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); return true; @@ -1241,18 +1245,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4)); if (!Thumb) MIB.addImm(0); - MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0); + MIB.add(predOps(ARMCC::AL)); MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Thumb ? ARM::tBLXr : ARM::BLX)); if (Thumb) - MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0); + MIB.add(predOps(ARMCC::AL)); MIB.addReg(Reg, RegState::Kill); } else { MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Thumb ? ARM::tBL : ARM::BL)); if (Thumb) - MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0); + MIB.add(predOps(ARMCC::AL)); MIB.addExternalSymbol("__aeabi_read_tp", 0); } @@ -1268,15 +1272,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); MachineInstrBuilder MIB1 = - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(NewLdOpc), DstReg) - .addOperand(MI.getOperand(1))); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) + .add(MI.getOperand(1)) + .add(predOps(ARMCC::AL)); MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(ARM::tPICADD)) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg) - .addOperand(MI.getOperand(2)); + MachineInstrBuilder MIB2 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg) + .add(MI.getOperand(2)); TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); return true; @@ -1319,7 +1323,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4)); if (IsARM) MIB.addImm(0); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); if (IsPIC) { MachineInstrBuilder MIB = @@ -1329,7 +1333,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .addImm(ARMPCLabelIndex); if (IsARM) - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); } MI.eraseFromParent(); @@ -1368,7 +1372,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg).addImm(LabelId); if (isARM) { - AddDefaultPred(MIB3); + MIB3.add(predOps(ARMCC::AL)); if (Opcode == ARM::MOV_ga_pcrel_ldr) MIB3->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); } @@ -1388,9 +1392,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC) .addReg(ARM::LR) - .addOperand(MI.getOperand(0)) - .addOperand(MI.getOperand(1)) - .addOperand(MI.getOperand(2)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) .addReg(ARM::CPSR, RegState::Undef); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); @@ -1407,11 +1411,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned DstReg = MI.getOperand(OpIdx++).getReg(); // Copy the source register. - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the predicate operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Add the destination operands (D subregs). unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0); @@ -1438,11 +1442,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); // Copy the destination register. - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the predicate operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Add the source operands (D subregs). unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index df4dcb3..bf00ef6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" @@ -21,30 +22,61 @@ #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include <cassert> +#include <cstdint> +#include <utility> + using namespace llvm; namespace { @@ -54,24 +86,22 @@ namespace { enum { RegBase, FrameIndexBase - } BaseType; + } BaseType = RegBase; union { unsigned Reg; int FI; } Base; - int Offset; + int Offset = 0; // Innocuous defaults for our address. - Address() - : BaseType(RegBase), Offset(0) { - Base.Reg = 0; - } + Address() { + Base.Reg = 0; + } } Address; class ARMFastISel final : public FastISel { - /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. const ARMSubtarget *Subtarget; @@ -99,8 +129,9 @@ class ARMFastISel final : public FastISel { Context = &funcInfo.Fn->getContext(); } - // Code from FastISel.cpp. private: + // Code from FastISel.cpp. + unsigned fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill); @@ -117,18 +148,18 @@ class ARMFastISel final : public FastISel { uint64_t Imm); // Backend specific FastISel code. - private: + bool fastSelectInstruction(const Instruction *I) override; unsigned fastMaterializeConstant(const Constant *C) override; unsigned fastMaterializeAlloca(const AllocaInst *AI) override; bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI) override; bool fastLowerArguments() override; - private: + #include "ARMGenFastISel.inc" // Instruction selection routines. - private: + bool SelectLoad(const Instruction *I); bool SelectStore(const Instruction *I); bool SelectBranch(const Instruction *I); @@ -151,12 +182,12 @@ class ARMFastISel final : public FastISel { bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy); // Utility routines. - private: + bool isPositionIndependent() const; bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, - bool isZExt); + bool isZExt, bool isEquality); bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment = 0, bool isZExt = true, bool allocReg = true); @@ -179,7 +210,7 @@ class ARMFastISel final : public FastISel { const TargetLowering *getTargetLowering() { return &TLI; } // Call handling routines. - private: + CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return, bool isVarArg); @@ -198,7 +229,7 @@ class ARMFastISel final : public FastISel { bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call); // OptionalDef handling routines. - private: + bool isARMNEONPred(const MachineInstr *MI); bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); @@ -219,8 +250,7 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { return false; // Look to see if our OptionalDef is defining CPSR or CCR. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; if (MO.getReg() == ARM::CPSR) *CPSR = true; @@ -236,8 +266,8 @@ bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { AFI->isThumb2Function()) return MI->isPredicable(); - for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) - if (MCID.OpInfo[i].isPredicate()) + for (const MCOperandInfo &opInfo : MCID.operands()) + if (opInfo.isPredicate()) return true; return false; @@ -256,17 +286,13 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { // Are we NEON in ARM mode and have a predicate operand? If so, I know // we're not predicable but add it anyways. if (isARMNEONPred(MI)) - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); // Do we optionally set a predicate? Preds is size > 0 iff the predicate // defines CPSR. All other OptionalDefines in ARM are the CCR register. bool CPSR = false; - if (DefinesOptionalPredicate(MI, &CPSR)) { - if (CPSR) - AddDefaultT1CC(MIB); - else - AddDefaultCC(MIB); - } + if (DefinesOptionalPredicate(MI, &CPSR)) + MIB.add(CPSR ? t1CondCodeOp() : condCodeOp()); return MIB; } @@ -434,7 +460,6 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) { } unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { - if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) return 0; @@ -739,7 +764,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { TmpOffset += SL->getElementOffset(Idx); } else { uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); - for (;;) { + while (true) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { // Constant-offset addressing. TmpOffset += CI->getSExtValue() * S; @@ -971,7 +996,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // Create the base instruction, then add the operands. if (allocReg) ResultReg = createResultReg(RC); - assert (ResultReg > 255 && "Expected an allocated virtual register."); + assert(ResultReg > 255 && "Expected an allocated virtual register."); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3); @@ -1216,7 +1241,6 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // behavior. if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { - // Get the compare predicate. // Try to take advantage of fallthrough opportunities. CmpInst::Predicate Predicate = CI->getPredicate(); @@ -1231,7 +1255,8 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { if (ARMPred == ARMCC::AL) return false; // Emit the compare. - if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), + CI->isEquality())) return false; unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; @@ -1318,14 +1343,16 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { } bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, - bool isZExt) { + bool isZExt, bool isEquality) { Type *Ty = Src1Value->getType(); EVT SrcEVT = TLI.getValueType(DL, Ty, true); if (!SrcEVT.isSimple()) return false; MVT SrcVT = SrcEVT.getSimpleVT(); - bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); - if (isFloat && !Subtarget->hasVFP2()) + if (Ty->isFloatTy() && !Subtarget->hasVFP2()) + return false; + + if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP())) return false; // Check to see if the 2nd operand is a constant that we can encode directly @@ -1364,10 +1391,18 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // TODO: Verify compares. case MVT::f32: isICmp = false; - CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES; + // Equality comparisons shouldn't raise Invalid on uordered inputs. + if (isEquality) + CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS; + else + CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES; break; case MVT::f64: isICmp = false; + // Equality comparisons shouldn't raise Invalid on uordered inputs. + if (isEquality) + CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD; + else CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED; break; case MVT::i1: @@ -1444,7 +1479,8 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { if (ARMPred == ARMCC::AL) return false; // Emit the compare. - if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), + CI->isEquality())) return false; // Now set a register based on the comparison. Explicitly set the predicates @@ -1466,7 +1502,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { bool ARMFastISel::SelectFPExt(const Instruction *I) { // Make sure we have VFP and that we're extending float to double. - if (!Subtarget->hasVFP2()) return false; + if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false; Value *V = I->getOperand(0); if (!I->getType()->isDoubleTy() || @@ -1485,7 +1521,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) { bool ARMFastISel::SelectFPTrunc(const Instruction *I) { // Make sure we have VFP and that we're truncating double to float. - if (!Subtarget->hasVFP2()) return false; + if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false; Value *V = I->getOperand(0); if (!(I->getType()->isFloatTy() && @@ -1536,7 +1572,8 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { unsigned Opc; if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS; - else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD; + else if (Ty->isDoubleTy() && !Subtarget->isFPOnlySP()) + Opc = isSigned ? ARM::VSITOD : ARM::VUITOD; else return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); @@ -1561,7 +1598,8 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) { unsigned Opc; Type *OpTy = I->getOperand(0)->getType(); if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS; - else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD; + else if (OpTy->isDoubleTy() && !Subtarget->isFPOnlySP()) + Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD; else return false; // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg. @@ -1596,7 +1634,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { bool UseImm = false; bool isNegativeImm = false; if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) { - assert (VT == MVT::i32 && "Expecting an i32."); + assert(VT == MVT::i32 && "Expecting an i32."); Imm = (int)ConstInt->getValue().getZExtValue(); if (Imm < 0) { isNegativeImm = true; @@ -1663,7 +1701,8 @@ bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) { // If we have integer div support we should have selected this automagically. // In case we have a real miss go ahead and return false and we'll pick // it up later. - if (Subtarget->hasDivide()) return false; + if (Subtarget->hasDivideInThumbMode()) + return false; // Otherwise emit a libcall. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -1765,8 +1804,9 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { // if we have them. // FIXME: It'd be nice to use NEON instructions. Type *Ty = I->getType(); - bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); - if (isFloat && !Subtarget->hasVFP2()) + if (Ty->isFloatTy() && !Subtarget->hasVFP2()) + return false; + if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP())) return false; unsigned Opc; @@ -1908,7 +1948,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) - .addImm(NumBytes)); + .addImm(NumBytes).addImm(0)); // Process the args. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -1926,16 +1966,16 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, case CCValAssign::SExt: { MVT DestVT = VA.getLocVT(); Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false); - assert (Arg != 0 && "Failed to emit a sext"); + assert(Arg != 0 && "Failed to emit a sext"); ArgVT = DestVT; break; } case CCValAssign::AExt: - // Intentional fall-through. Handle AExt and ZExt. + // Intentional fall-through. Handle AExt and ZExt. case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); - assert (Arg != 0 && "Failed to emit a zext"); + assert(Arg != 0 && "Failed to emit a zext"); ArgVT = DestVT; break; } @@ -1960,6 +2000,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, assert(VA.getLocVT() == MVT::f64 && "Custom lowering for v2f64 args not available"); + // FIXME: ArgLocs[++i] may extend beyond ArgLocs.size() CCValAssign &NextVA = ArgLocs[++i]; assert(VA.isRegLoc() && NextVA.isRegLoc() && @@ -2131,8 +2172,8 @@ bool ARMFastISel::SelectRet(const Instruction *I) { MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(RetOpc)); AddOptionalDefs(MIB); - for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) - MIB.addReg(RetRegs[i], RegState::Implicit); + for (unsigned R : RetRegs) + MIB.addReg(R, RegState::Implicit); return true; } @@ -2192,8 +2233,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { ArgRegs.reserve(I->getNumOperands()); ArgVTs.reserve(I->getNumOperands()); ArgFlags.reserve(I->getNumOperands()); - for (unsigned i = 0; i < I->getNumOperands(); ++i) { - Value *Op = I->getOperand(i); + for (Value *Op : I->operands()) { unsigned Arg = getRegForValue(Op); if (Arg == 0) return false; @@ -2230,15 +2270,15 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { DbgLoc, TII.get(CallOpc)); // BL / BLX don't take a predicate, but tBL / tBLX do. if (isThumb2) - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); if (Subtarget->genLongCalls()) MIB.addReg(CalleeReg); else MIB.addExternalSymbol(TLI.getLibcallName(Call)); // Add implicit physical register uses to the call. - for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i], RegState::Implicit); + for (unsigned R : RegArgs) + MIB.addReg(R, RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). @@ -2311,19 +2351,19 @@ bool ARMFastISel::SelectCall(const Instruction *I, break; ISD::ArgFlagsTy Flags; - unsigned AttrInd = i - CS.arg_begin() + 1; - if (CS.paramHasAttr(AttrInd, Attribute::SExt)) + unsigned ArgIdx = i - CS.arg_begin(); + if (CS.paramHasAttr(ArgIdx, Attribute::SExt)) Flags.setSExt(); - if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) + if (CS.paramHasAttr(ArgIdx, Attribute::ZExt)) Flags.setZExt(); // FIXME: Only handle *easy* calls for now. - if (CS.paramHasAttr(AttrInd, Attribute::InReg) || - CS.paramHasAttr(AttrInd, Attribute::StructRet) || - CS.paramHasAttr(AttrInd, Attribute::SwiftSelf) || - CS.paramHasAttr(AttrInd, Attribute::SwiftError) || - CS.paramHasAttr(AttrInd, Attribute::Nest) || - CS.paramHasAttr(AttrInd, Attribute::ByVal)) + if (CS.paramHasAttr(ArgIdx, Attribute::InReg) || + CS.paramHasAttr(ArgIdx, Attribute::StructRet) || + CS.paramHasAttr(ArgIdx, Attribute::SwiftSelf) || + CS.paramHasAttr(ArgIdx, Attribute::SwiftError) || + CS.paramHasAttr(ArgIdx, Attribute::Nest) || + CS.paramHasAttr(ArgIdx, Attribute::ByVal)) return false; Type *ArgTy = (*i)->getType(); @@ -2373,7 +2413,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, // ARM calls don't take a predicate, but tBL / tBLX do. if(isThumb2) - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); if (UseReg) MIB.addReg(CalleeReg); else if (!IntrMemName) @@ -2382,8 +2422,8 @@ bool ARMFastISel::SelectCall(const Instruction *I, MIB.addExternalSymbol(IntrMemName, 0); // Add implicit physical register uses to the call. - for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i], RegState::Implicit); + for (unsigned R : RegArgs) + MIB.addReg(R, RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). @@ -2418,7 +2458,7 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, else if (Len >= 2) VT = MVT::i16; else { - assert (Len == 1 && "Expected a length of 1!"); + assert(Len == 1 && "Expected a length of 1!"); VT = MVT::i8; } } else { @@ -2433,9 +2473,9 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, bool RV; unsigned ResultReg; RV = ARMEmitLoad(VT, ResultReg, Src); - assert (RV == true && "Should be able to handle this load."); + assert(RV && "Should be able to handle this load."); RV = ARMEmitStore(VT, ResultReg, Dest); - assert (RV == true && "Should be able to handle this store."); + assert(RV && "Should be able to handle this store."); (void)RV; unsigned Size = VT.getSizeInBits()/8; @@ -2687,9 +2727,11 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, if (setsCPSR) MIB.addReg(ARM::CPSR, RegState::Define); SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR); - AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc)); + MIB.addReg(SrcReg, isKill * RegState::Kill) + .addImm(ImmEnc) + .add(predOps(ARMCC::AL)); if (hasS) - AddDefaultCC(MIB); + MIB.add(condCodeOp()); // Second instruction consumes the first's result. SrcReg = ResultReg; } @@ -2779,7 +2821,6 @@ bool ARMFastISel::SelectShift(const Instruction *I, // TODO: SoftFP support. bool ARMFastISel::fastSelectInstruction(const Instruction *I) { - switch (I->getOpcode()) { case Instruction::Load: return SelectLoad(I); @@ -2849,6 +2890,7 @@ bool ARMFastISel::fastSelectInstruction(const Instruction *I) { } namespace { + // This table describes sign- and zero-extend instructions which can be // folded into a preceding load. All of these extends have an immediate // (sometimes a mask and sometimes a shift) that's applied after @@ -2865,7 +2907,8 @@ const struct FoldableLoadExtendsStruct { { { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 }, { { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 } }; -} + +} // end anonymous namespace /// \brief The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, @@ -2888,13 +2931,12 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, bool Found = false; bool isZExt; - for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends); - i != e; ++i) { - if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() && - (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm && - MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) { + for (const FoldableLoadExtendsStruct &FLE : FoldableLoadExtends) { + if (FLE.Opc[isThumb2] == MI->getOpcode() && + (uint64_t)FLE.ExpectedImm == Imm && + MVT((MVT::SimpleValueType)FLE.ExpectedVT) == VT) { Found = true; - isZExt = FoldableLoadExtends[i].isZExt; + isZExt = FLE.isZExt; } } if (!Found) return false; @@ -2933,7 +2975,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, .addConstantPoolIndex(Idx); if (Opc == ARM::LDRcp) MIB.addImm(0); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); // Fix the address by adding pc. unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); @@ -2944,7 +2986,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, .addReg(TempReg) .addImm(ARMPCLabelIndex); if (!Subtarget->isThumb()) - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); if (UseGOT_PREL && Subtarget->isThumb()) { unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); @@ -2981,20 +3023,18 @@ bool ARMFastISel::fastLowerArguments() { // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments // which are passed in r0 - r3. - unsigned Idx = 1; - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I, ++Idx) { - if (Idx > 4) + for (const Argument &Arg : F->args()) { + if (Arg.getArgNo() >= 4) return false; - if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) || - F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) || - F->getAttributes().hasAttribute(Idx, Attribute::ByVal)) + if (Arg.hasAttribute(Attribute::InReg) || + Arg.hasAttribute(Attribute::StructRet) || + Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::ByVal)) return false; - Type *ArgTy = I->getType(); + Type *ArgTy = Arg.getType(); if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) return false; @@ -3010,16 +3050,14 @@ bool ARMFastISel::fastLowerArguments() { } } - static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; const TargetRegisterClass *RC = &ARM::rGPRRegClass; - Idx = 0; - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I, ++Idx) { - unsigned SrcReg = GPRArgRegs[Idx]; + for (const Argument &Arg : F->args()) { + unsigned ArgNo = Arg.getArgNo(); + unsigned SrcReg = GPRArgRegs[ArgNo]; unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. // Without this, EmitLiveInCopies may eliminate the livein if its only @@ -3028,13 +3066,14 @@ bool ARMFastISel::fastLowerArguments() { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(DstReg, getKillRegState(true)); - updateValueMap(&*I, ResultReg); + updateValueMap(&Arg, ResultReg); } return true; } namespace llvm { + FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel()) @@ -3042,4 +3081,5 @@ namespace llvm { return nullptr; } -} + +} // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMFeatures.h b/contrib/llvm/lib/Target/ARM/ARMFeatures.h index 0c910ab..8c0df4c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFeatures.h +++ b/contrib/llvm/lib/Target/ARM/ARMFeatures.h @@ -19,10 +19,10 @@ namespace llvm { template<typename InstrType> // could be MachineInstr or MCInst -bool IsCPSRDead(InstrType *Instr); +bool IsCPSRDead(const InstrType *Instr); template<typename InstrType> // could be MachineInstr or MCInst -inline bool isV8EligibleForIT(InstrType *Instr) { +inline bool isV8EligibleForIT(const InstrType *Instr) { switch (Instr->getOpcode()) { default: return false; diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index c72db8a..16b54e8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -16,19 +16,49 @@ #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <utility> +#include <vector> #define DEBUG_TYPE "arm-frame-lowering" @@ -180,6 +210,7 @@ static bool WindowsRequiresStackProbe(const MachineFunction &MF, } namespace { + struct StackAdjustingInsts { struct InstInfo { MachineBasicBlock::iterator I; @@ -196,7 +227,8 @@ struct StackAdjustingInsts { } void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) { - auto Info = find_if(Insts, [&](InstInfo &Info) { return Info.I == I; }); + auto Info = + llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; }); assert(Info != Insts.end() && "invalid sp adjusting instruction"); Info->SPAdjust += ExtraBytes; } @@ -219,7 +251,8 @@ struct StackAdjustingInsts { } } }; -} + +} // end anonymous namespace /// Emit an instruction sequence that will align the address in /// register Reg by zero-ing out the lower bits. For versions of the @@ -252,38 +285,55 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, // lsr Reg, Reg, log2(Alignment) // lsl Reg, Reg, log2(Alignment) if (CanUseBFC) { - AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg) - .addReg(Reg, RegState::Kill) - .addImm(~AlignMask)); + BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg) + .addReg(Reg, RegState::Kill) + .addImm(~AlignMask) + .add(predOps(ARMCC::AL)); } else if (AlignMask <= 255) { - AddDefaultCC( - AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg) - .addReg(Reg, RegState::Kill) - .addImm(AlignMask))); + BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg) + .addReg(Reg, RegState::Kill) + .addImm(AlignMask) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } else { assert(!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single " "instruction to be emitted for large stack alignment for a target " "without BFC."); - AddDefaultCC(AddDefaultPred( - BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) - .addReg(Reg, RegState::Kill) - .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero)))); - AddDefaultCC(AddDefaultPred( - BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) - .addReg(Reg, RegState::Kill) - .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero)))); + BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) + .addReg(Reg, RegState::Kill) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero)) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) + .addReg(Reg, RegState::Kill) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero)) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } } else { // Since this is only reached for Thumb-2 targets, the BFC instruction // should always be available. assert(CanUseBFC); - AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg) - .addReg(Reg, RegState::Kill) - .addImm(~AlignMask)); + BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg) + .addReg(Reg, RegState::Kill) + .addImm(~AlignMask) + .add(predOps(ARMCC::AL)); } } +/// We need the offset of the frame pointer relative to other MachineFrameInfo +/// offsets which are encoded relative to SP at function begin. +/// See also emitPrologue() for how the FP is set up. +/// Unfortunately we cannot determine this value in determineCalleeSaves() yet +/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use +/// this to produce a conservative estimate that we check in an assert() later. +static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) { + // This is a conservative estimation: Assume the frame pointer being r7 and + // pc("r15") up to r8 getting spilled before (= 8 registers). + return -AFI.getArgRegsSaveSize() - (8 * 4); +} + void ARMFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -394,8 +444,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; int FramePtrOffsetInPush = 0; if (HasFP) { - FramePtrOffsetInPush = - MFI.getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize; + int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); + assert(getMaxFPOffset(*MF.getFunction(), *AFI) <= FPOffset && + "Max FP estimation is wrong"); + FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize; AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); } @@ -448,9 +500,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, uint32_t NumWords = NumBytes >> 2; if (NumWords < 65536) - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) - .addImm(NumWords) - .setMIFlags(MachineInstr::FrameSetup)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) + .addImm(NumWords) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); else BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4) .addImm(NumWords) @@ -462,10 +515,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, case CodeModel::Default: case CodeModel::Kernel: BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL)) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addExternalSymbol("__chkstk") - .addReg(ARM::R4, RegState::Implicit) - .setMIFlags(MachineInstr::FrameSetup); + .add(predOps(ARMCC::AL)) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit) + .setMIFlags(MachineInstr::FrameSetup); break; case CodeModel::Large: case CodeModel::JITDefault: @@ -474,18 +527,19 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlags(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr)) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addReg(ARM::R12, RegState::Kill) - .addReg(ARM::R4, RegState::Implicit) - .setMIFlags(MachineInstr::FrameSetup); + .add(predOps(ARMCC::AL)) + .addReg(ARM::R12, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit) + .setMIFlags(MachineInstr::FrameSetup); break; } - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), - ARM::SP) - .addReg(ARM::SP, RegState::Kill) - .addReg(ARM::R4, RegState::Kill) - .setMIFlags(MachineInstr::FrameSetup))); + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP) + .addReg(ARM::SP, RegState::Kill) + .addReg(ARM::R4, RegState::Kill) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); NumBytes = 0; } @@ -657,12 +711,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // -- out lower bits in r4 // mov sp, r4 // FIXME: It will be better just to find spare register here. - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) - .addReg(ARM::SP, RegState::Kill)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) + .addReg(ARM::SP, RegState::Kill) + .add(predOps(ARMCC::AL)); emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign, false); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) - .addReg(ARM::R4, RegState::Kill)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(ARM::R4, RegState::Kill) + .add(predOps(ARMCC::AL)); } AFI->setShouldRestoreSPFromFP(true); @@ -675,14 +731,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // FIXME: Clarify FrameSetup flags here. if (RegInfo->hasBasePointer(MF)) { if (isARM) - BuildMI(MBB, MBBI, dl, - TII.get(ARM::MOVr), RegInfo->getBaseRegister()) - .addReg(ARM::SP) - .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister()) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); else - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), - RegInfo->getBaseRegister()) - .addReg(ARM::SP)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister()) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); } // If the frame has variable sized objects then the epilogue must restore @@ -757,19 +813,21 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, "No scratch register to restore SP from FP!"); emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, ARMCC::AL, 0, TII); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), - ARM::SP) - .addReg(ARM::R4)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(ARM::R4) + .add(predOps(ARMCC::AL)); } } else { // Thumb2 or ARM. if (isARM) BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) - .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + .addReg(FramePtr) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); else - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), - ARM::SP) - .addReg(FramePtr)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(FramePtr) + .add(predOps(ARMCC::AL)); } } else if (NumBytes && !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) @@ -829,7 +887,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, // When dynamically realigning the stack, use the frame pointer for // parameters, and the stack/base pointer for locals. if (RegInfo->needsStackRealignment(MF)) { - assert (hasFP(MF) && "dynamic stack realignment without a FP!"); + assert(hasFP(MF) && "dynamic stack realignment without a FP!"); if (isFixed) { FrameReg = RegInfo->getFrameRegister(MF); Offset = FPOffset; @@ -910,8 +968,9 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) continue; - bool isLiveIn = MF.getRegInfo().isLiveIn(Reg); - if (!isLiveIn) + const MachineRegisterInfo &MRI = MF.getRegInfo(); + bool isLiveIn = MRI.isLiveIn(Reg); + if (!isLiveIn && !MRI.isReserved(Reg)) MBB.addLiveIn(Reg); // If NoGap is true, push consecutive registers and then leave the rest // for other instructions. e.g. @@ -936,18 +995,19 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, }); if (Regs.size() > 1 || StrOpc== 0) { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) - .addReg(ARM::SP).setMIFlags(MIFlags)); + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) + .addReg(ARM::SP) + .setMIFlags(MIFlags) + .add(predOps(ARMCC::AL)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); } else if (Regs.size() == 1) { - MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), - ARM::SP) - .addReg(Regs[0].first, getKillRegState(Regs[0].second)) - .addReg(ARM::SP).setMIFlags(MIFlags) - .addImm(-4); - AddDefaultPred(MIB); + BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP) + .addReg(Regs[0].first, getKillRegState(Regs[0].second)) + .addReg(ARM::SP) + .setMIFlags(MIFlags) + .addImm(-4) + .add(predOps(ARMCC::AL)); } Regs.clear(); @@ -1027,9 +1087,9 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, }); if (Regs.size() > 1 || LdrOpc == 0) { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) - .addReg(ARM::SP)); + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); if (DeleteRet && MI != MBB.end()) { @@ -1053,7 +1113,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); } else MIB.addImm(4); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); } Regs.clear(); @@ -1114,9 +1174,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // sub r4, sp, #numregs * 8 // The immediate is <= 64, so it doesn't need any special encoding. unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) - .addReg(ARM::SP) - .addImm(8 * NumAlignedDPRCS2Regs))); + BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + .addReg(ARM::SP) + .addImm(8 * NumAlignedDPRCS2Regs) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment(); // We must set parameter MustBeSingleInstruction to true, since @@ -1132,10 +1194,10 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // Leave r4 live, it is used below. Opc = isThumb ? ARM::tMOVr : ARM::MOVr; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP) - .addReg(ARM::R4); - MIB = AddDefaultPred(MIB); + .addReg(ARM::R4) + .add(predOps(ARMCC::AL)); if (!isThumb) - AddDefaultCC(MIB); + MIB.add(condCodeOp()); // Now spill NumAlignedDPRCS2Regs registers starting from d8. // r4 holds the stack slot address. @@ -1147,11 +1209,12 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass); MBB.addLiveIn(SupReg); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), - ARM::R4) - .addReg(ARM::R4, RegState::Kill).addImm(16) - .addReg(NextReg) - .addReg(SupReg, RegState::ImplicitKill)); + BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4) + .addReg(ARM::R4, RegState::Kill) + .addImm(16) + .addReg(NextReg) + .addReg(SupReg, RegState::ImplicitKill) + .add(predOps(ARMCC::AL)); NextReg += 4; NumAlignedDPRCS2Regs -= 4; } @@ -1165,9 +1228,12 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass); MBB.addLiveIn(SupReg); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) - .addReg(ARM::R4).addImm(16).addReg(NextReg) - .addReg(SupReg, RegState::ImplicitKill)); + BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) + .addReg(ARM::R4) + .addImm(16) + .addReg(NextReg) + .addReg(SupReg, RegState::ImplicitKill) + .add(predOps(ARMCC::AL)); NextReg += 4; NumAlignedDPRCS2Regs -= 4; } @@ -1177,8 +1243,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QPRRegClass); MBB.addLiveIn(SupReg); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) - .addReg(ARM::R4).addImm(16).addReg(SupReg)); + BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) + .addReg(ARM::R4) + .addImm(16) + .addReg(SupReg) + .add(predOps(ARMCC::AL)); NextReg += 2; NumAlignedDPRCS2Regs -= 2; } @@ -1187,9 +1256,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, if (NumAlignedDPRCS2Regs) { MBB.addLiveIn(NextReg); // vstr.64 uses addrmode5 which has an offset scale of 4. - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) - .addReg(NextReg) - .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2)); + BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) + .addReg(NextReg) + .addReg(ARM::R4) + .addImm((NextReg - R4BaseReg) * 2) + .add(predOps(ARMCC::AL)); } // The last spill instruction inserted should kill the scratch register r4. @@ -1254,8 +1325,11 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) - .addFrameIndex(D8SpillFI).addImm(0))); + BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + .addFrameIndex(D8SpillFI) + .addImm(0) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); // Now restore NumAlignedDPRCS2Regs registers starting from d8. unsigned NextReg = ARM::D8; @@ -1264,10 +1338,12 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, if (NumAlignedDPRCS2Regs >= 6) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) - .addReg(ARM::R4, RegState::Define) - .addReg(ARM::R4, RegState::Kill).addImm(16) - .addReg(SupReg, RegState::ImplicitDefine)); + BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) + .addReg(ARM::R4, RegState::Define) + .addReg(ARM::R4, RegState::Kill) + .addImm(16) + .addReg(SupReg, RegState::ImplicitDefine) + .add(predOps(ARMCC::AL)); NextReg += 4; NumAlignedDPRCS2Regs -= 4; } @@ -1280,9 +1356,11 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, if (NumAlignedDPRCS2Regs >= 4) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) - .addReg(ARM::R4).addImm(16) - .addReg(SupReg, RegState::ImplicitDefine)); + BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) + .addReg(ARM::R4) + .addImm(16) + .addReg(SupReg, RegState::ImplicitDefine) + .add(predOps(ARMCC::AL)); NextReg += 4; NumAlignedDPRCS2Regs -= 4; } @@ -1291,16 +1369,20 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, if (NumAlignedDPRCS2Regs >= 2) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QPRRegClass); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) - .addReg(ARM::R4).addImm(16)); + BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) + .addReg(ARM::R4) + .addImm(16) + .add(predOps(ARMCC::AL)); NextReg += 2; NumAlignedDPRCS2Regs -= 2; } // Finally, use a vanilla vldr.64 for the remaining odd register. if (NumAlignedDPRCS2Regs) - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) - .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg))); + BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) + .addReg(ARM::R4) + .addImm(2 * (NextReg - R4BaseReg)) + .add(predOps(ARMCC::AL)); // Last store kills r4. std::prev(MI)->addRegisterKilled(ARM::R4, TRI); @@ -1633,24 +1715,38 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // worth the effort and added fragility? unsigned EstimatedStackSize = MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills); - if (hasFP(MF)) { + + // Determine biggest (positive) SP offset in MachineFrameInfo. + int MaxFixedOffset = 0; + for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { + int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I); + MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset); + } + + bool HasFP = hasFP(MF); + if (HasFP) { if (AFI->hasStackFrame()) EstimatedStackSize += 4; } else { // If FP is not used, SP will be used to access arguments, so count the // size of arguments into the estimation. - EstimatedStackSize += MF.getInfo<ARMFunctionInfo>()->getArgumentStackSize(); + EstimatedStackSize += MaxFixedOffset; } EstimatedStackSize += 16; // For possible paddings. - bool BigStack = EstimatedStackSize >= estimateRSStackSizeLimit(MF, this) || - MFI.hasVarSizedObjects() || - (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)); + unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this); + int MaxFPOffset = getMaxFPOffset(*MF.getFunction(), *AFI); + bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit || + MFI.hasVarSizedObjects() || + (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) || + // For large argument stacks fp relative addressed may overflow. + (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit); bool ExtraCSSpill = false; - if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { + if (BigFrameOffsets || + !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); - if (hasFP(MF)) { + if (HasFP) { SavedRegs.set(FramePtr); // If the frame pointer is required by the ABI, also spill LR so that we // emit a complete frame record. @@ -1658,11 +1754,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(ARM::LR); LRSpilled = true; NumGPRSpills++; - auto LRPos = find(UnspilledCS1GPRs, ARM::LR); + auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR); if (LRPos != UnspilledCS1GPRs.end()) UnspilledCS1GPRs.erase(LRPos); } - auto FPPos = find(UnspilledCS1GPRs, FramePtr); + auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr); if (FPPos != UnspilledCS1GPRs.end()) UnspilledCS1GPRs.erase(FPPos); NumGPRSpills++; @@ -1721,7 +1817,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } // r7 can be used if it is not being used as the frame pointer. - if (!hasFP(MF)) { + if (!HasFP) { if (SavedRegs.test(ARM::R7)) { --RegDeficit; DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = " @@ -1773,7 +1869,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, NumGPRSpills++; CS1Spilled = true; ExtraCSSpill = true; - UnspilledCS1GPRs.erase(find(UnspilledCS1GPRs, Reg)); + UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg)); if (Reg == ARM::LR) LRSpilled = true; } @@ -1786,7 +1882,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(ARM::LR); NumGPRSpills++; SmallVectorImpl<unsigned>::iterator LRPos; - LRPos = find(UnspilledCS1GPRs, (unsigned)ARM::LR); + LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR); if (LRPos != UnspilledCS1GPRs.end()) UnspilledCS1GPRs.erase(LRPos); @@ -1831,7 +1927,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // callee-saved register or reserve a special spill slot to facilitate // register scavenging. Thumb1 needs a spill slot for stack pointer // adjustments also, even when the frame itself is small. - if (BigStack && !ExtraCSSpill) { + if (BigFrameOffsets && !ExtraCSSpill) { // If any non-reserved CS register isn't spilled, just spill one or two // extra. That should take care of it! unsigned NumExtras = TargetAlign / 4; @@ -1865,10 +1961,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // note: Thumb1 functions spill to R12, not the stack. Reserve a slot // closest to SP or frame pointer. assert(RS && "Register scavenging not provided"); - const TargetRegisterClass *RC = &ARM::GPRRegClass; - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + const TargetRegisterClass &RC = ARM::GPRRegClass; + unsigned Size = TRI->getSpillSize(RC); + unsigned Align = TRI->getSpillAlignment(RC); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); } } } @@ -1890,7 +1986,7 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( // ADJCALLSTACKUP -> add, sp, sp, amount MachineInstr &Old = *I; DebugLoc dl = Old.getDebugLoc(); - unsigned Amount = Old.getOperand(0).getImm(); + unsigned Amount = TII.getFrameSize(Old); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next @@ -1908,14 +2004,11 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( ARMCC::CondCodes Pred = (PIdx == -1) ? ARMCC::AL : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm(); + unsigned PredReg = TII.getFramePred(Old); if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { - // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. - unsigned PredReg = Old.getOperand(2).getReg(); emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags, Pred, PredReg); } else { - // Note: PredReg is operand 3 for ADJCALLSTACKUP. - unsigned PredReg = Old.getOperand(3).getReg(); assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags, Pred, PredReg); @@ -2081,12 +2174,17 @@ void ARMFrameLowering::adjustForSegmentedStacks( // SR1: Scratch Register #1 // push {SR0, SR1} if (Thumb) { - AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))) - .addReg(ScratchReg0).addReg(ScratchReg1); + BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } else { - AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD)) - .addReg(ARM::SP, RegState::Define).addReg(ARM::SP)) - .addReg(ScratchReg0).addReg(ScratchReg1); + BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } // Emit the relevant DWARF information about the change in stack pointer as @@ -2106,21 +2204,29 @@ void ARMFrameLowering::adjustForSegmentedStacks( // mov SR1, sp if (Thumb) { - AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1) - .addReg(ARM::SP)); + BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); } else if (CompareStackPointer) { - AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1) - .addReg(ARM::SP)).addReg(0); + BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } // sub SR1, sp, #StackSize if (!CompareStackPointer && Thumb) { - AddDefaultPred( - AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)) - .addReg(ScratchReg1).addImm(AlignedStackSize)); + BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1) + .add(condCodeOp()) + .addReg(ScratchReg1) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)); } else if (!CompareStackPointer) { - AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) - .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0); + BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) + .addReg(ARM::SP) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } if (Thumb && ST->isThumb1Only()) { @@ -2131,21 +2237,25 @@ void ARMFrameLowering::adjustForSegmentedStacks( unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4); // ldr SR0, [pc, offset(STACK_LIMIT)] - AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0) - .addConstantPoolIndex(CPI)); + BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0) + .addConstantPoolIndex(CPI) + .add(predOps(ARMCC::AL)); // ldr SR0, [SR0] - AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0) - .addReg(ScratchReg0).addImm(0)); + BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0) + .addReg(ScratchReg0) + .addImm(0) + .add(predOps(ARMCC::AL)); } else { // Get TLS base address from the coprocessor // mrc p15, #0, SR0, c13, c0, #3 - AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0) - .addImm(15) - .addImm(0) - .addImm(13) - .addImm(0) - .addImm(3)); + BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0) + .addImm(15) + .addImm(0) + .addImm(13) + .addImm(0) + .addImm(3) + .add(predOps(ARMCC::AL)); // Use the last tls slot on android and a private field of the TCP on linux. assert(ST->isTargetAndroid() || ST->isTargetLinux()); @@ -2153,16 +2263,19 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Get the stack limit from the right offset // ldr SR0, [sr0, #4 * TlsOffset] - AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0) - .addReg(ScratchReg0).addImm(4 * TlsOffset)); + BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0) + .addReg(ScratchReg0) + .addImm(4 * TlsOffset) + .add(predOps(ARMCC::AL)); } // Compare stack limit with stack size requested. // cmp SR0, SR1 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr; - AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode)) - .addReg(ScratchReg0) - .addReg(ScratchReg1)); + BuildMI(GetMBB, DL, TII.get(Opcode)) + .addReg(ScratchReg0) + .addReg(ScratchReg1) + .add(predOps(ARMCC::AL)); // This jump is taken if StackLimit < SP - stack required. Opcode = Thumb ? ARM::tBcc : ARM::Bcc; @@ -2178,32 +2291,40 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Pass first argument for the __morestack by Scratch Register #0. // The amount size of stack required if (Thumb) { - AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), - ScratchReg0)).addImm(AlignedStackSize)); + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0) + .add(condCodeOp()) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)); } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) - .addImm(AlignedStackSize)).addReg(0); + BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } // Pass second argument for the __morestack by Scratch Register #1. // The amount size of stack consumed to save function arguments. if (Thumb) { - AddDefaultPred( - AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)) - .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))); + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1) + .add(condCodeOp()) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) + .add(predOps(ARMCC::AL)); } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) - .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))) - .addReg(0); + BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } // push {lr} - Save return address of this function. if (Thumb) { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))) + BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) .addReg(ARM::LR); } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD)) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP)) + BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) .addReg(ARM::LR); } @@ -2220,7 +2341,8 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Call __morestack(). if (Thumb) { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL))) + BuildMI(AllocMBB, DL, TII.get(ARM::tBL)) + .add(predOps(ARMCC::AL)) .addExternalSymbol("__morestack"); } else { BuildMI(AllocMBB, DL, TII.get(ARM::BL)) @@ -2230,22 +2352,26 @@ void ARMFrameLowering::adjustForSegmentedStacks( // pop {lr} - Restore return address of this original function. if (Thumb) { if (ST->isThumb1Only()) { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) - .addReg(ScratchReg0); - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR) - .addReg(ScratchReg0)); + BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0); + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR) + .addReg(ScratchReg0) + .add(predOps(ARMCC::AL)); } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST)) - .addReg(ARM::LR, RegState::Define) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP) - .addImm(4)); + BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST)) + .addReg(ARM::LR, RegState::Define) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .addImm(4) + .add(predOps(ARMCC::AL)); } } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP)) - .addReg(ARM::LR); + BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ARM::LR); } // Restore SR0 and SR1 in case of __morestack() was called. @@ -2253,15 +2379,17 @@ void ARMFrameLowering::adjustForSegmentedStacks( // scratch registers from here. // pop {SR0, SR1} if (Thumb) { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP)) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } // Update the CFA offset now that we've popped @@ -2271,20 +2399,22 @@ void ARMFrameLowering::adjustForSegmentedStacks( // bx lr - Return from this function. Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET; - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode))); + BuildMI(AllocMBB, DL, TII.get(Opcode)).add(predOps(ARMCC::AL)); // Restore SR0 and SR1 in case of __morestack() was not called. // pop {SR0, SR1} if (Thumb) { - AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } else { - AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD)) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP)) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } // Update the CFA offset now that we've popped diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 7572955..f75dd4d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -228,11 +228,6 @@ private: const uint16_t *DOpcodes, const uint16_t *QOpcodes = nullptr); - /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, - /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be - /// generated to force the table registers to be consecutive. - void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc); - /// Try to select SBFX/UBFX instructions for ARM. bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); @@ -244,11 +239,8 @@ private: bool tryInlineAsm(SDNode *N); - void SelectConcatVector(SDNode *N); void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); - bool trySMLAWSMULW(SDNode *N); - void SelectCMP_SWAP(SDNode *N); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for @@ -547,11 +539,11 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, SDValue NewMulConst; if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { HandleSDNode Handle(N); + SDLoc Loc(N); replaceDAGValue(N.getOperand(1), NewMulConst); BaseReg = Handle.getValue(); - Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl, - PowerOfTwo), - SDLoc(N), MVT::i32); + Opc = CurDAG->getTargetConstant( + ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); return true; } } @@ -1866,6 +1858,14 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { return Opc; // If not one we handle, return it unchanged. } +/// Returns true if the given increment is a Constant known to be equal to the +/// access size performed by a NEON load/store. This means the "[rN]!" form can +/// be used. +static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { + auto C = dyn_cast<ConstantSDNode>(Inc); + return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; +} + void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *DOpcodes, const uint16_t *QOpcodes0, @@ -1933,13 +1933,13 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue Inc = N->getOperand(AddrOpIdx + 1); // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode())) + bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); + if ((NumVecs <= 2) && !IsImmUpdate) Opc = getVLDSTRegisterUpdateOpcode(Opc); // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs > 2 && !isVLDfixed(Opc)) || - !isa<ConstantSDNode>(Inc.getNode())) - Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); + if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate) + Ops.push_back(IsImmUpdate ? Reg0 : Inc); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -2087,11 +2087,12 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue Inc = N->getOperand(AddrOpIdx + 1); // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) + bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); + if (NumVecs <= 2 && !IsImmUpdate) Opc = getVLDSTRegisterUpdateOpcode(Opc); // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if (!isa<ConstantSDNode>(Inc.getNode())) + if (!IsImmUpdate) Ops.push_back(Inc); else if (NumVecs > 2 && !isVSTfixed(Opc)) Ops.push_back(Reg0); @@ -2221,7 +2222,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); + bool IsImmUpdate = + isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); + Ops.push_back(IsImmUpdate ? Reg0 : Inc); } SDValue SuperReg; @@ -2325,9 +2328,11 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, // fixed-stride update instructions don't have an explicit writeback // operand. It's implicit in the opcode itself. SDValue Inc = N->getOperand(2); - if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) + bool IsImmUpdate = + isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); + if (NumVecs <= 2 && !IsImmUpdate) Opc = getVLDSTRegisterUpdateOpcode(Opc); - if (!isa<ConstantSDNode>(Inc.getNode())) + if (!IsImmUpdate) Ops.push_back(Inc); // FIXME: VLD3 and VLD4 haven't been updated to that form yet. else if (NumVecs > 2) @@ -2363,39 +2368,6 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, CurDAG->RemoveDeadNode(N); } -void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, - unsigned Opc) { - assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); - SDLoc dl(N); - EVT VT = N->getValueType(0); - unsigned FirstTblReg = IsExt ? 2 : 1; - - // Form a REG_SEQUENCE to force register allocation. - SDValue RegSeq; - SDValue V0 = N->getOperand(FirstTblReg + 0); - SDValue V1 = N->getOperand(FirstTblReg + 1); - if (NumVecs == 2) - RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); - else { - SDValue V2 = N->getOperand(FirstTblReg + 2); - // If it's a vtbl3, form a quad D-register and leave the last part as - // an undef. - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) - : N->getOperand(FirstTblReg + 3); - RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); - } - - SmallVector<SDValue, 6> Ops; - if (IsExt) - Ops.push_back(N->getOperand(1)); - Ops.push_back(RegSeq); - Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); - Ops.push_back(getAL(CurDAG, dl)); // predicate - Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register - ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); -} - bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (!Subtarget->hasV6T2Ops()) return false; @@ -2563,141 +2535,6 @@ bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ return false; } -static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1, - bool Accumulate) { - // For SM*WB, we need to some form of sext. - // For SM*WT, we need to search for (sra X, 16) - // Src1 then gets set to X. - if ((SignExt.getOpcode() == ISD::SIGN_EXTEND || - SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG || - SignExt.getOpcode() == ISD::AssertSext) && - SignExt.getValueType() == MVT::i32) { - - *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB; - Src1 = SignExt.getOperand(0); - return true; - } - - if (SignExt.getOpcode() != ISD::SRA) - return false; - - ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1)); - if (!SRASrc1 || SRASrc1->getZExtValue() != 16) - return false; - - SDValue Op0 = SignExt.getOperand(0); - - // The sign extend operand for SM*WB could be generated by a shl and ashr. - if (Op0.getOpcode() == ISD::SHL) { - SDValue SHL = Op0; - ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1)); - if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16) - return false; - - *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB; - Src1 = Op0.getOperand(0); - return true; - } - *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT; - Src1 = SignExt.getOperand(0); - return true; -} - -static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0, - SDValue &Src1, bool Accumulate) { - // First we look for: - // (add (or (srl ?, 16), (shl ?, 16))) - if (OR.getOpcode() != ISD::OR) - return false; - - SDValue SRL = OR.getOperand(0); - SDValue SHL = OR.getOperand(1); - - if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) { - SRL = OR.getOperand(1); - SHL = OR.getOperand(0); - if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) - return false; - } - - ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1)); - ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1)); - if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 || - SHLSrc1->getZExtValue() != 16) - return false; - - // The first operands to the shifts need to be the two results from the - // same smul_lohi node. - if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) || - SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI) - return false; - - SDNode *SMULLOHI = SRL.getOperand(0).getNode(); - if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) || - SHL.getOperand(0) != SDValue(SMULLOHI, 1)) - return false; - - // Now we have: - // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16))) - // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments. - // For SMLAWB the 16-bit value will signed extended somehow. - // For SMLAWT only the SRA is required. - - // Check both sides of SMUL_LOHI - if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) { - Src0 = SMULLOHI->getOperand(1); - } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1, - Accumulate)) { - Src0 = SMULLOHI->getOperand(0); - } else { - return false; - } - return true; -} - -bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) { - if (!Subtarget->hasV6Ops() || - (Subtarget->isThumb() && !Subtarget->hasThumb2())) - return false; - - SDLoc dl(N); - SDValue Src0 = N->getOperand(0); - SDValue Src1 = N->getOperand(1); - SDValue A, B; - unsigned Opc = 0; - - if (N->getOpcode() == ISD::ADD) { - if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR) - return false; - - SDValue Acc; - if (SearchSignedMulLong(Src0, &Opc, A, B, true)) { - Acc = Src1; - } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) { - Acc = Src0; - } else { - return false; - } - if (Opc == 0) - return false; - - SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl), - CurDAG->getRegister(0, MVT::i32) }; - CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops); - return true; - } else if (N->getOpcode() == ISD::OR && - SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) { - if (Opc == 0) - return false; - - SDValue Ops[] = { A, B, getAL(CurDAG, dl), - CurDAG->getRegister(0, MVT::i32)}; - CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); - return true; - } - return false; -} - /// We've got special pseudo-instructions for these void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { unsigned Opcode; @@ -2726,15 +2563,6 @@ void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { CurDAG->RemoveDeadNode(N); } -void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { - // The only time a CONCAT_VECTORS operation can have legal types is when - // two 64-bit vectors are concatenated to a 128-bit vector. - EVT VT = N->getValueType(0); - if (!VT.is128BitVector() || N->getNumOperands() != 2) - llvm_unreachable("unexpected CONCAT_VECTORS"); - ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1))); -} - static Optional<std::pair<unsigned, unsigned>> getContiguousRangeOfSetBits(const APInt &A) { unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; @@ -2826,11 +2654,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; - case ISD::ADD: - case ISD::OR: - if (trySMLAWSMULW(N)) - return; - break; case ISD::WRITE_REGISTER: if (tryWriteRegister(N)) return; @@ -2859,9 +2682,12 @@ void ARMDAGToDAGISel::Select(SDNode *N) { SDNode *ResNode; if (Subtarget->isThumb()) { - SDValue Pred = getAL(CurDAG, dl); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); - SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; + SDValue Ops[] = { + CPIdx, + getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32), + CurDAG->getEntryNode() + }; ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, Ops); } else { @@ -2875,6 +2701,17 @@ void ARMDAGToDAGISel::Select(SDNode *N) { ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, Ops); } + // Annotate the Node with memory operand information so that MachineInstr + // queries work properly. This e.g. gives the register allocation the + // required information for rematerialization. + MachineFunction& MF = CurDAG->getMachineFunction(); + MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1); + MemOp[0] = MF.getMachineMemOperand( + MachinePointerInfo::getConstantPool(MF), + MachineMemOperand::MOLoad, 4, 4); + + cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp+1); + ReplaceNode(N, ResNode); return; } @@ -3046,49 +2883,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { break; } - case ARMISD::VMOVRRD: - ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32, - N->getOperand(0), getAL(CurDAG, dl), - CurDAG->getRegister(0, MVT::i32))); - return; - case ISD::UMUL_LOHI: { - if (Subtarget->isThumb1Only()) - break; - if (Subtarget->isThumb()) { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; - ReplaceNode( - N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops)); - return; - } else { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), - CurDAG->getRegister(0, MVT::i32) }; - ReplaceNode(N, CurDAG->getMachineNode( - Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl, - MVT::i32, MVT::i32, Ops)); - return; - } - } - case ISD::SMUL_LOHI: { - if (Subtarget->isThumb1Only()) - break; - if (Subtarget->isThumb()) { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; - ReplaceNode( - N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops)); - return; - } else { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), - CurDAG->getRegister(0, MVT::i32) }; - ReplaceNode(N, CurDAG->getMachineNode( - Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl, - MVT::i32, MVT::i32, Ops)); - return; - } - } case ARMISD::UMAAL: { unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; SDValue Ops[] = { N->getOperand(0), N->getOperand(1), @@ -3099,38 +2893,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } case ARMISD::UMLAL:{ - // UMAAL is similar to UMLAL but it adds two 32-bit values to the - // 64-bit multiplication result. - if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && - N->getOperand(2).getOpcode() == ARMISD::ADDC && - N->getOperand(3).getOpcode() == ARMISD::ADDE) { - - SDValue Addc = N->getOperand(2); - SDValue Adde = N->getOperand(3); - - if (Adde.getOperand(2).getNode() == Addc.getNode()) { - - ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0)); - ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1)); - - if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0) - { - // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm - // RdLo = one operand to be added, lower 32-bits of res - // RdHi = other operand to be added, upper 32-bits of res - // Rn = first multiply operand - // Rm = second multiply operand - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - Addc.getOperand(0), Addc.getOperand(1), - getAL(CurDAG, dl), - CurDAG->getRegister(0, MVT::i32) }; - unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; - CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops); - return; - } - } - } - if (Subtarget->isThumb()) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG, dl), @@ -3281,26 +3043,23 @@ void ARMDAGToDAGISel::Select(SDNode *N) { int64_t Addend = -C->getSExtValue(); SDNode *Add = nullptr; - // In T2 mode, ADDS can be better than CMN if the immediate fits in a + // ADDS can be better than CMN if the immediate fits in a // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. // Outside that range we can just use a CMN which is 32-bit but has a // 12-bit immediate range. - if (Subtarget->isThumb2() && Addend < 1<<8) { - SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), - getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), - CurDAG->getRegister(0, MVT::i32) }; - Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); - } else if (!Subtarget->isThumb2() && Addend < 1<<8) { - // FIXME: Add T1 tADDi8 code. - SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, - CurDAG->getTargetConstant(Addend, dl, MVT::i32), - getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; - Add = CurDAG->getMachineNode(ARM::tADDi8, dl, MVT::i32, Ops); - } else if (!Subtarget->isThumb2() && Addend < 1<<3) { - SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, - CurDAG->getTargetConstant(Addend, dl, MVT::i32), - getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; - Add = CurDAG->getMachineNode(ARM::tADDi3, dl, MVT::i32, Ops); + if (Addend < 1<<8) { + if (Subtarget->isThumb2()) { + SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); + } else { + unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; + SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, + CurDAG->getTargetConstant(Addend, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; + Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); + } } if (Add) { SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; @@ -3964,63 +3723,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { break; } - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); - switch (IntNo) { - default: - break; - - case Intrinsic::arm_neon_vtbl2: - SelectVTBL(N, false, 2, ARM::VTBL2); - return; - case Intrinsic::arm_neon_vtbl3: - SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); - return; - case Intrinsic::arm_neon_vtbl4: - SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); - return; - - case Intrinsic::arm_neon_vtbx2: - SelectVTBL(N, true, 2, ARM::VTBX2); - return; - case Intrinsic::arm_neon_vtbx3: - SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); - return; - case Intrinsic::arm_neon_vtbx4: - SelectVTBL(N, true, 4, ARM::VTBX4Pseudo); - return; - } - break; - } - - case ARMISD::VTBL1: { - SDLoc dl(N); - EVT VT = N->getValueType(0); - SDValue Ops[] = {N->getOperand(0), N->getOperand(1), - getAL(CurDAG, dl), // Predicate - CurDAG->getRegister(0, MVT::i32)}; // Predicate Register - ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops)); - return; - } - case ARMISD::VTBL2: { - SDLoc dl(N); - EVT VT = N->getValueType(0); - - // Form a REG_SEQUENCE to force register allocation. - SDValue V0 = N->getOperand(0); - SDValue V1 = N->getOperand(1); - SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); - - SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate - CurDAG->getRegister(0, MVT::i32)}; // Predicate Register - ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops)); - return; - } - - case ISD::CONCAT_VECTORS: - SelectConcatVector(N); - return; - case ISD::ATOMIC_CMP_SWAP: SelectCMP_SWAP(N); return; @@ -4127,11 +3829,10 @@ static inline int getMClassRegisterSYSmValueMask(StringRef RegString) { // The flags here are common to those allowed for apsr in the A class cores and // those allowed for the special registers in the M class cores. Returns a // value representing which flags were present, -1 if invalid. -static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) { - if (Flags.empty()) - return 0x2 | (int)hasDSP; - +static inline int getMClassFlagsMask(StringRef Flags) { return StringSwitch<int>(Flags) + .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is + // correct when flags are not permitted .Case("g", 0x1) .Case("nzcvq", 0x2) .Case("nzcvqg", 0x3) @@ -4174,7 +3875,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead, } // We know we are now handling a write so need to get the mask for the flags. - int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP()); + int Mask = getMClassFlagsMask(Flags); // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values // shouldn't have flags present. @@ -4189,10 +3890,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead, // The register was valid so need to put the mask in the correct place // (the flags need to be in bits 11-10) and combine with the SYSmvalue to // construct the operand for the instruction node. - if (SYSmvalue < 0x4) - return SYSmvalue | Mask << 10; - - return SYSmvalue; + return SYSmvalue | Mask << 10; } static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { @@ -4205,7 +3903,7 @@ static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { // The flags permitted for apsr are the same flags that are allowed in // M class registers. We get the flag value and then shift the flags into // the correct place to combine with the mask. - Mask = getMClassFlagsMask(Flags, true); + Mask = getMClassFlagsMask(Flags); if (Mask == -1) return -1; return Mask << 2; diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index 0f84a23..27dda93 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -13,46 +13,100 @@ //===----------------------------------------------------------------------===// #include "ARMISelLowering.h" +#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "ARMPerfectShuffle.h" +#include "ARMRegisterInfo.h" +#include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" -#include "ARMTargetMachine.h" -#include "ARMTargetObjectFile.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/MC/MCSectionMachO.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <iterator> +#include <limits> +#include <string> +#include <tuple> #include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "arm-isel" @@ -72,7 +126,7 @@ static cl::opt<bool> EnableConstpoolPromotion( "arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), - cl::init(true)); + cl::init(false)); // FIXME: set to true by default once PR32780 is fixed static cl::opt<unsigned> ConstpoolPromotionMaxSize( "arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), @@ -82,21 +136,6 @@ static cl::opt<unsigned> ConstpoolPromotionMaxTotal( cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128)); -namespace { - class ARMCCState : public CCState { - public: - ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, - SmallVectorImpl<CCValAssign> &locs, LLVMContext &C, - ParmContext PC) - : CCState(CC, isVarArg, MF, locs, C) { - assert(((PC == Call) || (PC == Prologue)) && - "ARMCCState users must specify whether their context is call" - "or prologue generation."); - CallOrPrologue = PC; - } - }; -} - // The APCS parameter registers. static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 @@ -162,7 +201,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) - for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) + for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT, Legal); } @@ -433,9 +472,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } // Use divmod compiler-rt calls for iOS 5.0 and later. - if (Subtarget->isTargetWatchOS() || - (Subtarget->isTargetIOS() && - !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { + if (Subtarget->isTargetMachO() && + !(Subtarget->isTargetIOS() && + Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); } @@ -545,7 +584,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); setOperationAction(ISD::FSIN, MVT::v2f64, Expand); setOperationAction(ISD::FCOS, MVT::v2f64, Expand); - setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); setOperationAction(ISD::FPOW, MVT::v2f64, Expand); setOperationAction(ISD::FLOG, MVT::v2f64, Expand); setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); @@ -563,7 +601,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FSIN, MVT::v4f32, Expand); setOperationAction(ISD::FCOS, MVT::v4f32, Expand); - setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); setOperationAction(ISD::FPOW, MVT::v4f32, Expand); setOperationAction(ISD::FLOG, MVT::v4f32, Expand); setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); @@ -580,7 +617,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); setOperationAction(ISD::FSIN, MVT::v2f32, Expand); setOperationAction(ISD::FCOS, MVT::v2f32, Expand); - setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); setOperationAction(ISD::FPOW, MVT::v2f32, Expand); setOperationAction(ISD::FLOG, MVT::v2f32, Expand); setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); @@ -685,10 +721,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } } - // ARM and Thumb2 support UMLAL/SMLAL. - if (!Subtarget->isThumb1Only()) - setTargetDAGCombine(ISD::ADDC); - if (Subtarget->isFPOnlySP()) { // When targeting a floating-point unit with only single-precision // operations, f64 is legal for the few double-precision instructions which @@ -707,7 +739,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); - setOperationAction(ISD::FPOWI, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FLOG, MVT::f64, Expand); setOperationAction(ISD::FLOG2, MVT::f64, Expand); @@ -786,14 +817,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); - if (!Subtarget->isThumb1Only()) { - // FIXME: We should do this for Thumb1 as well. - setOperationAction(ISD::ADDC, MVT::i32, Custom); - setOperationAction(ISD::ADDE, MVT::i32, Custom); - setOperationAction(ISD::SUBC, MVT::i32, Custom); - setOperationAction(ISD::SUBE, MVT::i32, Custom); - } + setOperationAction(ISD::ADDC, MVT::i32, Custom); + setOperationAction(ISD::ADDE, MVT::i32, Custom); + setOperationAction(ISD::SUBC, MVT::i32, Custom); + setOperationAction(ISD::SUBE, MVT::i32, Custom); if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); @@ -820,7 +849,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); - bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide() + bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() : Subtarget->hasDivideInARMMode(); if (!hasDivide) { // These are expanded into libcalls if the cpu doesn't have HW divider. @@ -828,7 +857,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UDIV, MVT::i32, LibCall); } - if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) { + if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) { setOperationAction(ISD::SDIV, MVT::i32, Custom); setOperationAction(ISD::UDIV, MVT::i32, Custom); @@ -1305,6 +1334,16 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::UMAAL: return "ARMISD::UMAAL"; case ARMISD::UMLAL: return "ARMISD::UMLAL"; case ARMISD::SMLAL: return "ARMISD::SMLAL"; + case ARMISD::SMLALBB: return "ARMISD::SMLALBB"; + case ARMISD::SMLALBT: return "ARMISD::SMLALBT"; + case ARMISD::SMLALTB: return "ARMISD::SMLALTB"; + case ARMISD::SMLALTT: return "ARMISD::SMLALTT"; + case ARMISD::SMULWB: return "ARMISD::SMULWB"; + case ARMISD::SMULWT: return "ARMISD::SMULWT"; + case ARMISD::SMLALD: return "ARMISD::SMLALD"; + case ARMISD::SMLALDX: return "ARMISD::SMLALDX"; + case ARMISD::SMLSLD: return "ARMISD::SMLSLD"; + case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; @@ -1414,6 +1453,40 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { // Lowering Code //===----------------------------------------------------------------------===// +static bool isSRL16(const SDValue &Op) { + if (Op.getOpcode() != ISD::SRL) + return false; + if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + return Const->getZExtValue() == 16; + return false; +} + +static bool isSRA16(const SDValue &Op) { + if (Op.getOpcode() != ISD::SRA) + return false; + if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + return Const->getZExtValue() == 16; + return false; +} + +static bool isSHL16(const SDValue &Op) { + if (Op.getOpcode() != ISD::SHL) + return false; + if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + return Const->getZExtValue() == 16; + return false; +} + +// Check for a signed 16-bit value. We special case SRA because it makes it +// more simple when also looking for SRAs that aren't sign extending a +// smaller value. Without the check, we'd need to take extra care with +// checking order for some operations. +static bool isS16(const SDValue &Op, SelectionDAG &DAG) { + if (isSRA16(Op)) + return isSHL16(Op.getOperand(0)); + return DAG.ComputeNumSignBits(Op) == 17; +} + /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { switch (CC) { @@ -1433,22 +1506,34 @@ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, - ARMCC::CondCodes &CondCode2) { + ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) { CondCode2 = ARMCC::AL; + InvalidOnQNaN = true; switch (CC) { default: llvm_unreachable("Unknown FP condition!"); case ISD::SETEQ: - case ISD::SETOEQ: CondCode = ARMCC::EQ; break; + case ISD::SETOEQ: + CondCode = ARMCC::EQ; + InvalidOnQNaN = false; + break; case ISD::SETGT: case ISD::SETOGT: CondCode = ARMCC::GT; break; case ISD::SETGE: case ISD::SETOGE: CondCode = ARMCC::GE; break; case ISD::SETOLT: CondCode = ARMCC::MI; break; case ISD::SETOLE: CondCode = ARMCC::LS; break; - case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; + case ISD::SETONE: + CondCode = ARMCC::MI; + CondCode2 = ARMCC::GT; + InvalidOnQNaN = false; + break; case ISD::SETO: CondCode = ARMCC::VC; break; case ISD::SETUO: CondCode = ARMCC::VS; break; - case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; + case ISD::SETUEQ: + CondCode = ARMCC::EQ; + CondCode2 = ARMCC::VS; + InvalidOnQNaN = false; + break; case ISD::SETUGT: CondCode = ARMCC::HI; break; case ISD::SETUGE: CondCode = ARMCC::PL; break; case ISD::SETLT: @@ -1456,7 +1541,10 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, case ISD::SETLE: case ISD::SETULE: CondCode = ARMCC::LE; break; case ISD::SETNE: - case ISD::SETUNE: CondCode = ARMCC::NE; break; + case ISD::SETUNE: + CondCode = ARMCC::NE; + InvalidOnQNaN = false; + break; } } @@ -1549,8 +1637,8 @@ SDValue ARMTargetLowering::LowerCallResult( // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, - *DAG.getContext(), Call); + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg)); // Copy all of the result registers out of their specified physreg. @@ -1710,8 +1798,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, - *DAG.getContext(), Call); + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg)); // Get a count of how many bytes are to be pushed on the stack. @@ -1724,8 +1812,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!isSibCall) - Chain = DAG.getCALLSEQ_START(Chain, - DAG.getIntPtrConstant(NumBytes, dl, true), dl); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); @@ -2088,10 +2175,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, /// this. void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, unsigned Align) const { - assert((State->getCallOrPrologue() == Prologue || - State->getCallOrPrologue() == Call) && - "unhandled ParmContext"); - // Byval (as with any stack) slots are always at least 4 byte aligned. Align = std::max(Align, 4U); @@ -2148,7 +2231,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII) { unsigned Bytes = Arg.getValueSizeInBits() / 8; - int FI = INT_MAX; + int FI = std::numeric_limits<int>::max(); if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); if (!TargetRegisterInfo::isVirtualRegister(VR)) @@ -2178,7 +2261,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, } else return false; - assert(FI != INT_MAX); + assert(FI != std::numeric_limits<int>::max()); if (!MFI.isFixedObjectIndex(FI)) return false; return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); @@ -2260,7 +2343,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // Check if stack adjustment is needed. For now, do not do this if any // argument is passed on the stack. SmallVector<CCValAssign, 16> ArgLocs; - ARMCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C, Call); + CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); if (CCInfo.getNextStackOffset()) { // Check if the arguments are already laid out in the right way as @@ -2362,8 +2445,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SmallVector<CCValAssign, 16> RVLocs; // CCState - Info about the registers and stack slots. - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, - *DAG.getContext(), Call); + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); // Analyze outgoing return values. CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); @@ -2550,7 +2633,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { return true; } -bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { +bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { if (!Subtarget->supportsTailCall()) return false; @@ -2586,12 +2669,35 @@ static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) { // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only // be used to form addressing mode. These wrapped nodes will be selected // into MOVi. -static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); // FIXME there is no actual debug info here SDLoc dl(Op); ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); SDValue Res; + + // When generating execute-only code Constant Pools must be promoted to the + // global data section. It's a bit ugly that we can't share them across basic + // blocks, but this way we guarantee that execute-only behaves correct with + // position-independent addressing modes. + if (Subtarget->genExecuteOnly()) { + auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); + auto T = const_cast<Type*>(CP->getType()); + auto C = const_cast<Constant*>(CP->getConstVal()); + auto M = const_cast<Module*>(DAG.getMachineFunction(). + getFunction()->getParent()); + auto GV = new GlobalVariable( + *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C, + Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + + Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" + + Twine(AFI->createPICLabelUId()) + ); + SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV), + dl, PtrVT); + return LowerGlobalAddress(GA, DAG); + } + if (CP->isMachineConstantPoolEntry()) Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlignment()); @@ -2790,9 +2896,9 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, // FIXME: is there useful debug info available here? TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), - DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args)); + CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( + CallingConv::C, Type::getInt32Ty(*DAG.getContext()), + DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args)); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.first; @@ -2935,7 +3041,7 @@ static bool isSimpleType(Type *T) { } static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, - EVT PtrVT, SDLoc dl) { + EVT PtrVT, const SDLoc &dl) { // If we're creating a pool entry for a constant global with unnamed address, // and the global is small enough, we can emit it inline into the constant pool // to save ourselves an indirection. @@ -2980,7 +3086,8 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, unsigned RequiredPadding = 4 - (Size % 4); bool PaddingPossible = RequiredPadding == 4 || (CDAInit && CDAInit->isString()); - if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize) + if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize || + Size == 0) return SDValue(); unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding); @@ -3034,6 +3141,19 @@ static bool isReadOnly(const GlobalValue *GV) { isa<Function>(GV); } +SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + switch (Subtarget->getTargetTriple().getObjectFormat()) { + default: llvm_unreachable("unknown object format"); + case Triple::COFF: + return LowerGlobalAddressWindows(Op, DAG); + case Triple::ELF: + return LowerGlobalAddressELF(Op, DAG); + case Triple::MachO: + return LowerGlobalAddressDarwin(Op, DAG); + } +} + SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -3080,15 +3200,22 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, return Result; } else if (Subtarget->isRWPI() && !IsRO) { // SB-relative. - ARMConstantPoolValue *CPV = - ARMConstantPoolConstant::Create(GV, ARMCP::SBREL); - SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDValue G = DAG.getLoad( - PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + SDValue RelAddr; + if (Subtarget->useMovt(DAG.getMachineFunction())) { + ++NumMovwMovt; + SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL); + RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G); + } else { // use literal pool for address constant + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(GV, ARMCP::SBREL); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + RelAddr = DAG.getLoad( + PtrVT, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + } SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT); - SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, G); + SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr); return Result; } @@ -3219,6 +3346,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, } return Result; } + case Intrinsic::arm_neon_vabs: + return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); case Intrinsic::arm_neon_vmulls: case Intrinsic::arm_neon_vmullu: { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) @@ -3256,13 +3386,23 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } + case Intrinsic::arm_neon_vtbl1: + return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::arm_neon_vtbl2: + return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } } static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { - // FIXME: handle "fence singlethread" more efficiently. SDLoc dl(Op); + ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2)); + auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue()); + if (SSID == SyncScope::SingleThread) + return Op; + if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get @@ -3462,8 +3602,8 @@ SDValue ARMTargetLowering::LowerFormalArguments( // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, - *DAG.getContext(), Prologue); + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg)); SmallVector<SDValue, 16> ArgValues; @@ -3595,7 +3735,6 @@ SDValue ARMTargetLowering::LowerFormalArguments( InVals.push_back(ArgValue); } else { // VA.isRegLoc() - // sanity check assert(VA.isMemLoc()); assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); @@ -3734,13 +3873,15 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, - SelectionDAG &DAG, const SDLoc &dl) const { + SelectionDAG &DAG, const SDLoc &dl, + bool InvalidOnQNaN) const { assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64); SDValue Cmp; + SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32); if (!isFloatingPointZero(RHS)) - Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); + Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C); else - Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); + Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C); return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); } @@ -3757,10 +3898,12 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { Cmp = Cmp.getOperand(0); Opc = Cmp.getOpcode(); if (Opc == ARMISD::CMPFP) - Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0), + Cmp.getOperand(1), Cmp.getOperand(2)); else { assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); - Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0), + Cmp.getOperand(1)); } return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } @@ -3808,7 +3951,6 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, return std::make_pair(Value, OverflowCmp); } - SDValue ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { // Let legalize expand this if it isn't a legal type yet. @@ -3832,7 +3974,6 @@ ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } - SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -4025,7 +4166,6 @@ static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, // Additionally, the variable is returned in parameter V and the constant in K. static bool isSaturatingConditional(const SDValue &Op, SDValue &V, uint64_t &K) { - SDValue LHS1 = Op.getOperand(0); SDValue RHS1 = Op.getOperand(1); SDValue TrueVal1 = Op.getOperand(2); @@ -4046,10 +4186,10 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V, // in each conditional SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1) ? &RHS1 - : NULL; + : nullptr; SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2) ? &RHS2 - : NULL; + : nullptr; SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2; SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1; SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2; @@ -4073,13 +4213,15 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V, const SDValue *LowerCheckOp = isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) ? &Op - : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 - : NULL; + : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) + ? &Op2 + : nullptr; const SDValue *UpperCheckOp = isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) ? &Op - : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 - : NULL; + : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) + ? &Op2 + : nullptr; if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp) return false; @@ -4104,7 +4246,6 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V, } SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); SDLoc dl(Op); @@ -4162,7 +4303,8 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { } ARMCC::CondCodes CondCode, CondCode2; - FPCCToARMCC(CC, CondCode, CondCode2); + bool InvalidOnQNaN; + FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN); // Try to generate VMAXNM/VMINNM on ARMv8. if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || @@ -4181,13 +4323,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { } SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); if (CondCode2 != ARMCC::AL) { SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. - SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); } return Result; @@ -4348,10 +4490,11 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { } ARMCC::CondCodes CondCode, CondCode2; - FPCCToARMCC(CC, CondCode, CondCode2); + bool InvalidOnQNaN; + FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN); SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; @@ -4853,9 +4996,10 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) // so that the shift + and get folded into a bitfield extract. SDLoc dl(Op); - SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, - DAG.getConstant(Intrinsic::arm_get_fpscr, dl, - MVT::i32)); + SDValue Ops[] = { DAG.getEntryNode(), + DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) }; + + SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops); SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, DAG.getConstant(1U << 22, dl, MVT::i32)); SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, @@ -5212,15 +5356,15 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { // Integer comparisons. switch (SetCCOpcode) { default: llvm_unreachable("Illegal integer comparison"); - case ISD::SETNE: Invert = true; + case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; case ISD::SETEQ: Opc = ARMISD::VCEQ; break; - case ISD::SETLT: Swap = true; + case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETGT: Opc = ARMISD::VCGT; break; - case ISD::SETLE: Swap = true; + case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETGE: Opc = ARMISD::VCGE; break; - case ISD::SETULT: Swap = true; + case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGT: Opc = ARMISD::VCGTU; break; - case ISD::SETULE: Swap = true; + case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGE: Opc = ARMISD::VCGEU; break; } @@ -5584,7 +5728,6 @@ static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) { return true; } - static bool isVEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); @@ -5758,7 +5901,10 @@ static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; ++j) { if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult) return false; @@ -5789,7 +5935,10 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ unsigned Half = NumElts / 2; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; j += Half) { unsigned Idx = WhichResult; for (unsigned k = 0; k < Half; ++k) { @@ -5829,7 +5978,10 @@ static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || @@ -5862,7 +6014,10 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || @@ -6027,10 +6182,10 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } if (ValueCounts.size() != 1) usesOnlyOneValue = false; - if (!Value.getNode() && ValueCounts.size() > 0) + if (!Value.getNode() && !ValueCounts.empty()) Value = ValueCounts.begin()->first; - if (ValueCounts.size() == 0) + if (ValueCounts.empty()) return DAG.getUNDEF(VT); // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR. @@ -6182,8 +6337,8 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, struct ShuffleSourceInfo { SDValue Vec; - unsigned MinElt; - unsigned MaxElt; + unsigned MinElt = std::numeric_limits<unsigned>::max(); + unsigned MaxElt = 0; // We may insert some combination of BITCASTs and VEXT nodes to force Vec to // be compatible with the shuffle we intend to construct. As a result @@ -6192,13 +6347,12 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // Code should guarantee that element i in Vec starts at element "WindowBase // + i * WindowScale in ShuffleVec". - int WindowBase; - int WindowScale; + int WindowBase = 0; + int WindowScale = 1; + + ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {} bool operator ==(SDValue OtherVec) { return Vec == OtherVec; } - ShuffleSourceInfo(SDValue Vec) - : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0), - WindowScale(1) {} }; // First gather all vectors used as an immediate source for this BUILD_VECTOR @@ -6220,7 +6374,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // Add this element source to the list if it's not already there. SDValue SourceVec = V.getOperand(0); - auto Source = find(Sources, SourceVec); + auto Source = llvm::find(Sources, SourceVec); if (Source == Sources.end()) Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec)); @@ -6336,7 +6490,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, if (Entry.isUndef()) continue; - auto Src = find(Sources, Entry.getOperand(0)); + auto Src = llvm::find(Sources, Entry.getOperand(0)); int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue(); // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit @@ -6633,7 +6787,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { EVT SubVT = SubV1.getValueType(); // We expect these to have been canonicalized to -1. - assert(all_of(ShuffleMask, [&](int i) { + assert(llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"); @@ -6896,8 +7050,19 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { N->getValueType(0), N->getOpcode()); - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) - return SkipLoadExtensionForVMULL(LD, DAG); + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && + "Expected extending load"); + + SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG); + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1)); + unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + SDValue extLoad = + DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad); + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad); + + return newLoad; + } // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will // have been legalized as a BITCAST from v4i32. @@ -7242,7 +7407,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Pair of floats / doubles used to pass the result. - Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr); + Type *RetTy = StructType::get(ArgTy, ArgTy); auto &DL = DAG.getDataLayout(); ArgListTy Args; @@ -7258,9 +7423,9 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { ArgListEntry Entry; Entry.Node = SRet; Entry.Ty = RetTy->getPointerTo(); - Entry.isSExt = false; - Entry.isZExt = false; - Entry.isSRet = true; + Entry.IsSExt = false; + Entry.IsZExt = false; + Entry.IsSRet = true; Args.push_back(Entry); RetTy = Type::getVoidTy(*DAG.getContext()); } @@ -7268,8 +7433,8 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { ArgListEntry Entry; Entry.Node = Arg; Entry.Ty = ArgTy; - Entry.isSExt = false; - Entry.isZExt = false; + Entry.IsSExt = false; + Entry.IsZExt = false; Args.push_back(Entry); const char *LibcallName = @@ -7427,6 +7592,9 @@ static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) { SDValue VHi = DAG.getAnyExtOrTrunc( DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)), dl, MVT::i32); + bool isBigEndian = DAG.getDataLayout().isBigEndian(); + if (isBigEndian) + std::swap (VLo, VHi); SDValue RegClass = DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32); @@ -7454,10 +7622,14 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N, MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1); - Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32, - SDValue(CmpSwap, 0))); - Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32, - SDValue(CmpSwap, 0))); + bool isBigEndian = DAG.getDataLayout().isBigEndian(); + + Results.push_back( + DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0, + SDLoc(N), MVT::i32, SDValue(CmpSwap, 0))); + Results.push_back( + DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1, + SDLoc(N), MVT::i32, SDValue(CmpSwap, 0))); Results.push_back(SDValue(CmpSwap, 2)); } @@ -7480,12 +7652,12 @@ static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget, Entry.Node = Val; Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.isZExt = true; + Entry.IsZExt = true; Args.push_back(Entry); Entry.Node = Exponent; Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.isZExt = true; + Entry.IsZExt = true; Args.push_back(Entry); Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext()); @@ -7517,21 +7689,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); - case ISD::ConstantPool: - if (Subtarget->genExecuteOnly()) - llvm_unreachable("execute-only should not generate constant pools"); - return LowerConstantPool(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); - case ISD::GlobalAddress: - switch (Subtarget->getTargetTriple().getObjectFormat()) { - default: llvm_unreachable("unknown object format"); - case Triple::COFF: - return LowerGlobalAddressWindows(Op, DAG); - case Triple::ELF: - return LowerGlobalAddressELF(Op, DAG); - case Triple::MachO: - return LowerGlobalAddressDarwin(Op, DAG); - } + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -7607,6 +7767,37 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { } } +static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) { + unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned Opc = 0; + if (IntNo == Intrinsic::arm_smlald) + Opc = ARMISD::SMLALD; + else if (IntNo == Intrinsic::arm_smlaldx) + Opc = ARMISD::SMLALDX; + else if (IntNo == Intrinsic::arm_smlsld) + Opc = ARMISD::SMLSLD; + else if (IntNo == Intrinsic::arm_smlsldx) + Opc = ARMISD::SMLSLDX; + else + return; + + SDLoc dl(N); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + N->getOperand(3), + DAG.getConstant(0, dl, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + N->getOperand(3), + DAG.getConstant(1, dl, MVT::i32)); + + SDValue LongMul = DAG.getNode(Opc, dl, + DAG.getVTList(MVT::i32, MVT::i32), + N->getOperand(1), N->getOperand(2), + Lo, Hi); + Results.push_back(LongMul.getValue(0)); + Results.push_back(LongMul.getValue(1)); +} + /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. void ARMTargetLowering::ReplaceNodeResults(SDNode *N, @@ -7648,6 +7839,8 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_CMP_SWAP: ReplaceCMP_SWAP_64Results(N, Results, DAG); return; + case ISD::INTRINSIC_WO_CHAIN: + return ReplaceLongIntrinsic(N, Results, DAG); } if (Res.getNode()) Results.push_back(Res); @@ -7702,24 +7895,27 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, // add r5, pc // str r5, [$jbuf, #+4] ; &jbuf[1] unsigned NewVReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) - .addConstantPoolIndex(CPI) - .addMemOperand(CPMMO)); + BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) + .addConstantPoolIndex(CPI) + .addMemOperand(CPMMO) + .add(predOps(ARMCC::AL)); // Set the low bit because of thumb mode. unsigned NewVReg2 = MRI->createVirtualRegister(TRC); - AddDefaultCC( - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) - .addReg(NewVReg1, RegState::Kill) - .addImm(0x01))); + BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) + .addReg(NewVReg1, RegState::Kill) + .addImm(0x01) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) .addReg(NewVReg2, RegState::Kill) .addImm(PCLabelId); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) - .addReg(NewVReg3, RegState::Kill) - .addFrameIndex(FI) - .addImm(36) // &jbuf[1] :: pc - .addMemOperand(FIMMOSt)); + BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) + .addReg(NewVReg3, RegState::Kill) + .addFrameIndex(FI) + .addImm(36) // &jbuf[1] :: pc + .addMemOperand(FIMMOSt) + .add(predOps(ARMCC::AL)); } else if (isThumb) { // Incoming value: jbuf // ldr.n r1, LCPI1_4 @@ -7729,51 +7925,58 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, // add r2, $jbuf, #+4 ; &jbuf[1] // str r1, [r2] unsigned NewVReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) - .addConstantPoolIndex(CPI) - .addMemOperand(CPMMO)); + BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) + .addConstantPoolIndex(CPI) + .addMemOperand(CPMMO) + .add(predOps(ARMCC::AL)); unsigned NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(PCLabelId); // Set the low bit because of thumb mode. unsigned NewVReg3 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) - .addReg(ARM::CPSR, RegState::Define) - .addImm(1)); + BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) + .addReg(ARM::CPSR, RegState::Define) + .addImm(1) + .add(predOps(ARMCC::AL)); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) - .addReg(ARM::CPSR, RegState::Define) - .addReg(NewVReg2, RegState::Kill) - .addReg(NewVReg3, RegState::Kill)); + BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg3, RegState::Kill) + .add(predOps(ARMCC::AL)); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5) .addFrameIndex(FI) .addImm(36); // &jbuf[1] :: pc - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) - .addReg(NewVReg4, RegState::Kill) - .addReg(NewVReg5, RegState::Kill) - .addImm(0) - .addMemOperand(FIMMOSt)); + BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) + .addReg(NewVReg4, RegState::Kill) + .addReg(NewVReg5, RegState::Kill) + .addImm(0) + .addMemOperand(FIMMOSt) + .add(predOps(ARMCC::AL)); } else { // Incoming value: jbuf // ldr r1, LCPI1_1 // add r1, pc, r1 // str r1, [$jbuf, #+4] ; &jbuf[1] unsigned NewVReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) - .addConstantPoolIndex(CPI) - .addImm(0) - .addMemOperand(CPMMO)); + BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) + .addConstantPoolIndex(CPI) + .addImm(0) + .addMemOperand(CPMMO) + .add(predOps(ARMCC::AL)); unsigned NewVReg2 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) - .addReg(NewVReg1, RegState::Kill) - .addImm(PCLabelId)); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) - .addReg(NewVReg2, RegState::Kill) - .addFrameIndex(FI) - .addImm(36) // &jbuf[1] :: pc - .addMemOperand(FIMMOSt)); + BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) + .addReg(NewVReg1, RegState::Kill) + .addImm(PCLabelId) + .add(predOps(ARMCC::AL)); + BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) + .addReg(NewVReg2, RegState::Kill) + .addFrameIndex(FI) + .addImm(36) // &jbuf[1] :: pc + .addMemOperand(FIMMOSt) + .add(predOps(ARMCC::AL)); } } @@ -7791,7 +7994,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, // Get a mapping of the call site numbers to all of the landing pads they're // associated with. - DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; + DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad; unsigned MaxCSNum = 0; for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { @@ -7886,31 +8089,36 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) - .addFrameIndex(FI) - .addImm(4) - .addMemOperand(FIMMOLd)); + BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) + .addFrameIndex(FI) + .addImm(4) + .addMemOperand(FIMMOLd) + .add(predOps(ARMCC::AL)); if (NumLPads < 256) { - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) - .addReg(NewVReg1) - .addImm(LPadList.size())); + BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) + .addReg(NewVReg1) + .addImm(LPadList.size()) + .add(predOps(ARMCC::AL)); } else { unsigned VReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) - .addImm(NumLPads & 0xFFFF)); + BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) + .addImm(NumLPads & 0xFFFF) + .add(predOps(ARMCC::AL)); unsigned VReg2 = VReg1; if ((NumLPads & 0xFFFF0000) != 0) { VReg2 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) - .addReg(VReg1) - .addImm(NumLPads >> 16)); + BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) + .addReg(VReg1) + .addImm(NumLPads >> 16) + .add(predOps(ARMCC::AL)); } - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) - .addReg(NewVReg1) - .addReg(VReg2)); + BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) + .addReg(NewVReg1) + .addReg(VReg2) + .add(predOps(ARMCC::AL)); } BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) @@ -7919,16 +8127,17 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addReg(ARM::CPSR); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3) - .addJumpTableIndex(MJTI)); + BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3) + .addJumpTableIndex(MJTI) + .add(predOps(ARMCC::AL)); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); - AddDefaultCC( - AddDefaultPred( - BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) + BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) .addReg(NewVReg3, RegState::Kill) .addReg(NewVReg1) - .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) .addReg(NewVReg4, RegState::Kill) @@ -7936,15 +8145,17 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addJumpTableIndex(MJTI); } else if (Subtarget->isThumb()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) - .addFrameIndex(FI) - .addImm(1) - .addMemOperand(FIMMOLd)); + BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) + .addFrameIndex(FI) + .addImm(1) + .addMemOperand(FIMMOLd) + .add(predOps(ARMCC::AL)); if (NumLPads < 256) { - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) - .addReg(NewVReg1) - .addImm(NumLPads)); + BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) + .addReg(NewVReg1) + .addImm(NumLPads) + .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); @@ -7957,12 +8168,14 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) - .addReg(VReg1, RegState::Define) - .addConstantPoolIndex(Idx)); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) - .addReg(NewVReg1) - .addReg(VReg1)); + BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) + .addReg(VReg1, RegState::Define) + .addConstantPoolIndex(Idx) + .add(predOps(ARMCC::AL)); + BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) + .addReg(NewVReg1) + .addReg(VReg1) + .add(predOps(ARMCC::AL)); } BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) @@ -7971,37 +8184,42 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addReg(ARM::CPSR); unsigned NewVReg2 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) - .addReg(ARM::CPSR, RegState::Define) - .addReg(NewVReg1) - .addImm(2)); + BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg1) + .addImm(2) + .add(predOps(ARMCC::AL)); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) - .addJumpTableIndex(MJTI)); + BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) + .addJumpTableIndex(MJTI) + .add(predOps(ARMCC::AL)); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) - .addReg(ARM::CPSR, RegState::Define) - .addReg(NewVReg2, RegState::Kill) - .addReg(NewVReg3)); + BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg3) + .add(predOps(ARMCC::AL)); MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) - .addReg(NewVReg4, RegState::Kill) - .addImm(0) - .addMemOperand(JTMMOLd)); + BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) + .addReg(NewVReg4, RegState::Kill) + .addImm(0) + .addMemOperand(JTMMOLd) + .add(predOps(ARMCC::AL)); unsigned NewVReg6 = NewVReg5; if (IsPositionIndependent) { NewVReg6 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) - .addReg(ARM::CPSR, RegState::Define) - .addReg(NewVReg5, RegState::Kill) - .addReg(NewVReg3)); + BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg5, RegState::Kill) + .addReg(NewVReg3) + .add(predOps(ARMCC::AL)); } BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) @@ -8009,31 +8227,36 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addJumpTableIndex(MJTI); } else { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) - .addFrameIndex(FI) - .addImm(4) - .addMemOperand(FIMMOLd)); + BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) + .addFrameIndex(FI) + .addImm(4) + .addMemOperand(FIMMOLd) + .add(predOps(ARMCC::AL)); if (NumLPads < 256) { - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) - .addReg(NewVReg1) - .addImm(NumLPads)); + BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) + .addReg(NewVReg1) + .addImm(NumLPads) + .add(predOps(ARMCC::AL)); } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { unsigned VReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) - .addImm(NumLPads & 0xFFFF)); + BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) + .addImm(NumLPads & 0xFFFF) + .add(predOps(ARMCC::AL)); unsigned VReg2 = VReg1; if ((NumLPads & 0xFFFF0000) != 0) { VReg2 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) - .addReg(VReg1) - .addImm(NumLPads >> 16)); + BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) + .addReg(VReg1) + .addImm(NumLPads >> 16) + .add(predOps(ARMCC::AL)); } - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) - .addReg(NewVReg1) - .addReg(VReg2)); + BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) + .addReg(NewVReg1) + .addReg(VReg2) + .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); @@ -8046,13 +8269,15 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) - .addReg(VReg1, RegState::Define) - .addConstantPoolIndex(Idx) - .addImm(0)); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) - .addReg(NewVReg1) - .addReg(VReg1, RegState::Kill)); + BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) + .addReg(VReg1, RegState::Define) + .addConstantPoolIndex(Idx) + .addImm(0) + .add(predOps(ARMCC::AL)); + BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) + .addReg(NewVReg1) + .addReg(VReg1, RegState::Kill) + .add(predOps(ARMCC::AL)); } BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) @@ -8061,23 +8286,25 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addReg(ARM::CPSR); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); - AddDefaultCC( - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) - .addReg(NewVReg1) - .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); + BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) + .addReg(NewVReg1) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) - .addJumpTableIndex(MJTI)); + BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) + .addJumpTableIndex(MJTI) + .add(predOps(ARMCC::AL)); MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); - AddDefaultPred( - BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) - .addReg(NewVReg3, RegState::Kill) - .addReg(NewVReg4) - .addImm(0) - .addMemOperand(JTMMOLd)); + BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) + .addReg(NewVReg3, RegState::Kill) + .addReg(NewVReg4) + .addImm(0) + .addMemOperand(JTMMOLd) + .add(predOps(ARMCC::AL)); if (IsPositionIndependent) { BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) @@ -8222,26 +8449,35 @@ static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2); assert(LdOpc != 0 && "Should have a load opcode"); if (LdSize >= 8) { - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) - .addReg(AddrOut, RegState::Define).addReg(AddrIn) - .addImm(0)); + BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrOut, RegState::Define) + .addReg(AddrIn) + .addImm(0) + .add(predOps(ARMCC::AL)); } else if (IsThumb1) { // load + update AddrIn - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) - .addReg(AddrIn).addImm(0)); - MachineInstrBuilder MIB = - BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); - MIB = AddDefaultT1CC(MIB); - MIB.addReg(AddrIn).addImm(LdSize); - AddDefaultPred(MIB); + BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrIn) + .addImm(0) + .add(predOps(ARMCC::AL)); + BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut) + .add(t1CondCodeOp()) + .addReg(AddrIn) + .addImm(LdSize) + .add(predOps(ARMCC::AL)); } else if (IsThumb2) { - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) - .addReg(AddrOut, RegState::Define).addReg(AddrIn) - .addImm(LdSize)); + BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrOut, RegState::Define) + .addReg(AddrIn) + .addImm(LdSize) + .add(predOps(ARMCC::AL)); } else { // arm - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) - .addReg(AddrOut, RegState::Define).addReg(AddrIn) - .addReg(0).addImm(LdSize)); + BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrOut, RegState::Define) + .addReg(AddrIn) + .addReg(0) + .addImm(LdSize) + .add(predOps(ARMCC::AL)); } } @@ -8254,24 +8490,36 @@ static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2); assert(StOpc != 0 && "Should have a store opcode"); if (StSize >= 8) { - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) - .addReg(AddrIn).addImm(0).addReg(Data)); + BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) + .addReg(AddrIn) + .addImm(0) + .addReg(Data) + .add(predOps(ARMCC::AL)); } else if (IsThumb1) { // store + update AddrIn - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data) - .addReg(AddrIn).addImm(0)); - MachineInstrBuilder MIB = - BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); - MIB = AddDefaultT1CC(MIB); - MIB.addReg(AddrIn).addImm(StSize); - AddDefaultPred(MIB); + BuildMI(*BB, Pos, dl, TII->get(StOpc)) + .addReg(Data) + .addReg(AddrIn) + .addImm(0) + .add(predOps(ARMCC::AL)); + BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut) + .add(t1CondCodeOp()) + .addReg(AddrIn) + .addImm(StSize) + .add(predOps(ARMCC::AL)); } else if (IsThumb2) { - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) - .addReg(Data).addReg(AddrIn).addImm(StSize)); + BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) + .addReg(Data) + .addReg(AddrIn) + .addImm(StSize) + .add(predOps(ARMCC::AL)); } else { // arm - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) - .addReg(Data).addReg(AddrIn).addReg(0) - .addImm(StSize)); + BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) + .addReg(Data) + .addReg(AddrIn) + .addReg(0) + .addImm(StSize) + .add(predOps(ARMCC::AL)); } } @@ -8402,16 +8650,15 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, unsigned Vtmp = varEnd; if ((LoopSize & 0xFFFF0000) != 0) Vtmp = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, - TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), - Vtmp).addImm(LoopSize & 0xFFFF)); + BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp) + .addImm(LoopSize & 0xFFFF) + .add(predOps(ARMCC::AL)); if ((LoopSize & 0xFFFF0000) != 0) - AddDefaultPred(BuildMI(BB, dl, - TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), - varEnd) - .addReg(Vtmp) - .addImm(LoopSize >> 16)); + BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd) + .addReg(Vtmp) + .addImm(LoopSize >> 16) + .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); @@ -8424,11 +8671,16 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); if (IsThumb) - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg( - varEnd, RegState::Define).addConstantPoolIndex(Idx)); + BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)) + .addReg(varEnd, RegState::Define) + .addConstantPoolIndex(Idx) + .add(predOps(ARMCC::AL)); else - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg( - varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0)); + BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)) + .addReg(varEnd, RegState::Define) + .addConstantPoolIndex(Idx) + .addImm(0) + .add(predOps(ARMCC::AL)); } BB->addSuccessor(loopMBB); @@ -8465,16 +8717,19 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, // Decrement loop variable by UnitSize. if (IsThumb1) { - MachineInstrBuilder MIB = - BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop); - MIB = AddDefaultT1CC(MIB); - MIB.addReg(varPhi).addImm(UnitSize); - AddDefaultPred(MIB); + BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop) + .add(t1CondCodeOp()) + .addReg(varPhi) + .addImm(UnitSize) + .add(predOps(ARMCC::AL)); } else { MachineInstrBuilder MIB = BuildMI(*BB, BB->end(), dl, TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); - AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); + MIB.addReg(varPhi) + .addImm(UnitSize) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); MIB->getOperand(5).setReg(ARM::CPSR); MIB->getOperand(5).setIsDef(true); } @@ -8545,11 +8800,14 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, case CodeModel::Default: case CodeModel::Kernel: BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addExternalSymbol("__chkstk") - .addReg(ARM::R4, RegState::Implicit | RegState::Kill) - .addReg(ARM::R4, RegState::Implicit | RegState::Define) - .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + .add(predOps(ARMCC::AL)) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, + RegState::Implicit | RegState::Define | RegState::Dead) + .addReg(ARM::CPSR, + RegState::Implicit | RegState::Define | RegState::Dead); break; case CodeModel::Large: case CodeModel::JITDefault: { @@ -8559,20 +8817,24 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) .addExternalSymbol("__chkstk"); BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr)) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addReg(Reg, RegState::Kill) - .addReg(ARM::R4, RegState::Implicit | RegState::Kill) - .addReg(ARM::R4, RegState::Implicit | RegState::Define) - .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + .add(predOps(ARMCC::AL)) + .addReg(Reg, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, + RegState::Implicit | RegState::Define | RegState::Dead) + .addReg(ARM::CPSR, + RegState::Implicit | RegState::Define | RegState::Dead); break; } } - AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), - ARM::SP) - .addReg(ARM::SP, RegState::Kill) - .addReg(ARM::R4, RegState::Kill) - .setMIFlags(MachineInstr::FrameSetup))); + BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP) + .addReg(ARM::SP, RegState::Kill) + .addReg(ARM::R4, RegState::Kill) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); MI.eraseFromParent(); return MBB; @@ -8597,9 +8859,10 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI, MF->push_back(TrapBB); MBB->addSuccessor(TrapBB); - AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8)) - .addReg(MI.getOperand(0).getReg()) - .addImm(0)); + BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8)) + .addReg(MI.getOperand(0).getReg()) + .addImm(0) + .add(predOps(ARMCC::AL)); BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc)) .addMBB(TrapBB) .addImm(ARMCC::EQ) @@ -8617,18 +8880,18 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, bool isThumb2 = Subtarget->isThumb2(); switch (MI.getOpcode()) { default: { - MI.dump(); + MI.print(errs()); llvm_unreachable("Unexpected instr type to insert"); } // Thumb1 post-indexed loads are really just single-register LDMs. case ARM::tLDR_postidx: { BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD)) - .addOperand(MI.getOperand(1)) // Rn_wb - .addOperand(MI.getOperand(2)) // Rn - .addOperand(MI.getOperand(3)) // PredImm - .addOperand(MI.getOperand(4)) // PredReg - .addOperand(MI.getOperand(0)); // Rt + .add(MI.getOperand(1)) // Rn_wb + .add(MI.getOperand(2)) // Rn + .add(MI.getOperand(3)) // PredImm + .add(MI.getOperand(4)) // PredReg + .add(MI.getOperand(0)); // Rt MI.eraseFromParent(); return BB; } @@ -8659,12 +8922,12 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineMemOperand *MMO = *MI.memoperands_begin(); BuildMI(*BB, MI, dl, TII->get(NewOpc)) - .addOperand(MI.getOperand(0)) // Rn_wb - .addOperand(MI.getOperand(1)) // Rt - .addOperand(MI.getOperand(2)) // Rn - .addImm(Offset) // offset (skip GPR==zero_reg) - .addOperand(MI.getOperand(5)) // pred - .addOperand(MI.getOperand(6)) + .add(MI.getOperand(0)) // Rn_wb + .add(MI.getOperand(1)) // Rt + .add(MI.getOperand(2)) // Rn + .addImm(Offset) // offset (skip GPR==zero_reg) + .add(MI.getOperand(5)) // pred + .add(MI.getOperand(6)) .addMemOperand(MMO); MI.eraseFromParent(); return BB; @@ -8681,7 +8944,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); for (unsigned i = 0; i < MI.getNumOperands(); ++i) - MIB.addOperand(MI.getOperand(i)); + MIB.add(MI.getOperand(i)); MI.eraseFromParent(); return BB; } @@ -8754,18 +9017,20 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, unsigned LHS1 = MI.getOperand(1).getReg(); unsigned LHS2 = MI.getOperand(2).getReg(); if (RHSisZero) { - AddDefaultPred(BuildMI(BB, dl, - TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(LHS1).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(LHS1) + .addImm(0) + .add(predOps(ARMCC::AL)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(LHS2).addImm(0) .addImm(ARMCC::EQ).addReg(ARM::CPSR); } else { unsigned RHS1 = MI.getOperand(3).getReg(); unsigned RHS2 = MI.getOperand(4).getReg(); - AddDefaultPred(BuildMI(BB, dl, - TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) - .addReg(LHS1).addReg(RHS1)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) + .addReg(LHS1) + .addReg(RHS1) + .add(predOps(ARMCC::AL)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(LHS2).addReg(RHS2) .addImm(ARMCC::EQ).addReg(ARM::CPSR); @@ -8779,7 +9044,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); if (isThumb2) - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); + BuildMI(BB, dl, TII->get(ARM::t2B)) + .addMBB(exitMBB) + .add(predOps(ARMCC::AL)); else BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); @@ -8842,9 +9109,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, RSBBB->addSuccessor(SinkBB); // insert a cmp at the end of BB - AddDefaultPred(BuildMI(BB, dl, - TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(ABSSrcReg).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(ABSSrcReg) + .addImm(0) + .add(predOps(ARMCC::AL)); // insert a bcc with opposite CC to ARMCC::MI at the end of BB BuildMI(BB, dl, @@ -8855,9 +9123,11 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // Note: BCC and rsbri will be converted into predicated rsbmi // by if-conversion pass BuildMI(*RSBBB, RSBBB->begin(), dl, - TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) - .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0) - .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) + .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0) + .addImm(0) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); // insert PHI in SinkBB, // reuse ABSDstReg to not change uses of ABS instruction @@ -8927,19 +9197,45 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // Rename pseudo opcodes. unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode()); + unsigned ccOutIdx; if (NewOpc) { const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo(); MCID = &TII->get(NewOpc); - assert(MCID->getNumOperands() == MI.getDesc().getNumOperands() + 1 && - "converted opcode should be the same except for cc_out"); + assert(MCID->getNumOperands() == + MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize() + && "converted opcode should be the same except for cc_out" + " (and, on Thumb1, pred)"); MI.setDesc(*MCID); // Add the optional cc_out operand MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); - } - unsigned ccOutIdx = MCID->getNumOperands() - 1; + + // On Thumb1, move all input operands to the end, then add the predicate + if (Subtarget->isThumb1Only()) { + for (unsigned c = MCID->getNumOperands() - 4; c--;) { + MI.addOperand(MI.getOperand(1)); + MI.RemoveOperand(1); + } + + // Restore the ties + for (unsigned i = MI.getNumOperands(); i--;) { + const MachineOperand& op = MI.getOperand(i); + if (op.isReg() && op.isUse()) { + int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO); + if (DefIdx != -1) + MI.tieOperands(DefIdx, i); + } + } + + MI.addOperand(MachineOperand::CreateImm(ARMCC::AL)); + MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false)); + ccOutIdx = 1; + } else + ccOutIdx = MCID->getNumOperands() - 1; + } else + ccOutIdx = MCID->getNumOperands() - 1; // Any ARM instruction that sets the 's' bit should specify an optional // "cc_out" operand in the last operand position. @@ -8970,7 +9266,9 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, if (deadCPSR) { assert(!MI.getOperand(ccOutIdx).getReg() && "expect uninitialized optional cc_out operand"); - return; + // Thumb1 instructions must have the S bit even if the CPSR is dead. + if (!Subtarget->isThumb1Only()) + return; } // If this instruction was defined with an optional CPSR def and its dag node @@ -9032,7 +9330,7 @@ static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDLoc dl(N); EVT VT = N->getValueType(0); CC = N->getOperand(0); - if (CC.getValueType() != MVT::i1) + if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC) return false; Invert = !AllOnes; if (AllOnes) @@ -9265,8 +9563,11 @@ AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, return SDValue(); } - // Don't generate vpaddl+vmovn; we'll match it to vpadd later. - if (Vec.getValueType().getVectorElementType() == VT.getVectorElementType()) + // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure + // we're using the entire input vector, otherwise there's a size/legality + // mismatch somewhere. + if (nextIndex != Vec.getValueType().getVectorNumElements() || + Vec.getValueType().getVectorElementType() == VT.getVectorElementType()) return SDValue(); // Create VPADDL node. @@ -9308,10 +9609,90 @@ static SDValue findMUL_LOHI(SDValue V) { return SDValue(); } -static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, +static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + + if (Subtarget->isThumb()) { + if (!Subtarget->hasDSP()) + return SDValue(); + } else if (!Subtarget->hasV5TEOps()) + return SDValue(); + + // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and + // accumulates the product into a 64-bit value. The 16-bit values will + // be sign extended somehow or SRA'd into 32-bit values + // (addc (adde (mul 16bit, 16bit), lo), hi) + SDValue Mul = AddcNode->getOperand(0); + SDValue Lo = AddcNode->getOperand(1); + if (Mul.getOpcode() != ISD::MUL) { + Lo = AddcNode->getOperand(0); + Mul = AddcNode->getOperand(1); + if (Mul.getOpcode() != ISD::MUL) + return SDValue(); + } + + SDValue SRA = AddeNode->getOperand(0); + SDValue Hi = AddeNode->getOperand(1); + if (SRA.getOpcode() != ISD::SRA) { + SRA = AddeNode->getOperand(1); + Hi = AddeNode->getOperand(0); + if (SRA.getOpcode() != ISD::SRA) + return SDValue(); + } + if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) { + if (Const->getZExtValue() != 31) + return SDValue(); + } else + return SDValue(); + + if (SRA.getOperand(0) != Mul) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(AddcNode); + unsigned Opcode = 0; + SDValue Op0; + SDValue Op1; + + if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) { + Opcode = ARMISD::SMLALBB; + Op0 = Mul.getOperand(0); + Op1 = Mul.getOperand(1); + } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) { + Opcode = ARMISD::SMLALBT; + Op0 = Mul.getOperand(0); + Op1 = Mul.getOperand(1).getOperand(0); + } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) { + Opcode = ARMISD::SMLALTB; + Op0 = Mul.getOperand(0).getOperand(0); + Op1 = Mul.getOperand(1); + } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) { + Opcode = ARMISD::SMLALTT; + Op0 = Mul->getOperand(0).getOperand(0); + Op1 = Mul->getOperand(1).getOperand(0); + } + + if (!Op0 || !Op1) + return SDValue(); + + SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), + Op0, Op1, Lo, Hi); + // Replace the ADDs' nodes uses by the MLA node's values. + SDValue HiMLALResult(SMLAL.getNode(), 1); + SDValue LoMLALResult(SMLAL.getNode(), 0); + + DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); + + // Return original node to notify the driver to stop replacing. + SDValue resNode(AddcNode, 0); + return resNode; +} + +static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { - // Look for multiply add opportunities. // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where // each add nodes consumes a value from ISD::UMUL_LOHI and there is @@ -9326,7 +9707,17 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // \ / // ADDC <- hiAdd // - assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"); + assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE"); + + assert(AddeNode->getNumOperands() == 3 && + AddeNode->getOperand(2).getValueType() == MVT::i32 && + "ADDE node has the wrong inputs"); + + // Check that we have a glued ADDC node. + SDNode* AddcNode = AddeNode->getOperand(2).getNode(); + if (AddcNode->getOpcode() != ARMISD::ADDC) + return SDValue(); + SDValue AddcOp0 = AddcNode->getOperand(0); SDValue AddcOp1 = AddcNode->getOperand(1); @@ -9338,29 +9729,13 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, AddcNode->getValueType(0) == MVT::i32 && "Expect ADDC with two result values. First: i32"); - // Check that we have a glued ADDC node. - if (AddcNode->getValueType(1) != MVT::Glue) - return SDValue(); - - // Check that the ADDC adds the low result of the S/UMUL_LOHI. + // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it + // maybe a SMLAL which multiplies two 16-bit values. if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && AddcOp0->getOpcode() != ISD::SMUL_LOHI && AddcOp1->getOpcode() != ISD::UMUL_LOHI && AddcOp1->getOpcode() != ISD::SMUL_LOHI) - return SDValue(); - - // Look for the glued ADDE. - SDNode* AddeNode = AddcNode->getGluedUser(); - if (!AddeNode) - return SDValue(); - - // Make sure it is really an ADDE. - if (AddeNode->getOpcode() != ISD::ADDE) - return SDValue(); - - assert(AddeNode->getNumOperands() == 3 && - AddeNode->getOperand(2).getValueType() == MVT::Glue && - "ADDE node has the wrong inputs"); + return AddCombineTo64BitSMLAL16(AddcNode, AddeNode, DCI, Subtarget); // Check for the triangle shape. SDValue AddeOp0 = AddeNode->getOperand(0); @@ -9435,38 +9810,25 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); // Return original node to notify the driver to stop replacing. - SDValue resNode(AddcNode, 0); - return resNode; + return SDValue(AddeNode, 0); } -static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, +static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // UMAAL is similar to UMLAL except that it adds two unsigned values. // While trying to combine for the other MLAL nodes, first search for the - // chance to use UMAAL. Check if Addc uses another addc node which can first - // be combined into a UMLAL. The other pattern is AddcNode being combined - // into an UMLAL and then using another addc is handled in ISelDAGToDAG. - - if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || - (Subtarget->isThumb() && !Subtarget->hasThumb2())) - return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); - - SDNode *PrevAddc = nullptr; - if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC) - PrevAddc = AddcNode->getOperand(0).getNode(); - else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC) - PrevAddc = AddcNode->getOperand(1).getNode(); - - // If there's no addc chains, just return a search for any MLAL. - if (PrevAddc == nullptr) - return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); - - // Try to convert the addc operand to an MLAL and if that fails try to - // combine AddcNode. - SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget); - if (MLAL != SDValue(PrevAddc, 0)) - return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); + // chance to use UMAAL. Check if Addc uses a node which has already + // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde + // as the addend, and it's handled in PerformUMLALCombine. + + if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) + return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget); + + // Check that we have a glued ADDC node. + SDNode* AddcNode = AddeNode->getOperand(2).getNode(); + if (AddcNode->getOpcode() != ARMISD::ADDC) + return SDValue(); // Find the converted UMAAL or quit if it doesn't exist. SDNode *UmlalNode = nullptr; @@ -9478,29 +9840,18 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, UmlalNode = AddcNode->getOperand(1).getNode(); AddHi = AddcNode->getOperand(0); } else { - return SDValue(); + return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget); } // The ADDC should be glued to an ADDE node, which uses the same UMLAL as // the ADDC as well as Zero. - auto *Zero = dyn_cast<ConstantSDNode>(UmlalNode->getOperand(3)); - - if (!Zero || Zero->getZExtValue() != 0) - return SDValue(); - - // Check that we have a glued ADDC node. - if (AddcNode->getValueType(1) != MVT::Glue) - return SDValue(); - - // Look for the glued ADDE. - SDNode* AddeNode = AddcNode->getGluedUser(); - if (!AddeNode) + if (!isNullConstant(UmlalNode->getOperand(3))) return SDValue(); - if ((AddeNode->getOperand(0).getNode() == Zero && + if ((isNullConstant(AddeNode->getOperand(0)) && AddeNode->getOperand(1).getNode() == UmlalNode) || (AddeNode->getOperand(0).getNode() == UmlalNode && - AddeNode->getOperand(1).getNode() == Zero)) { + isNullConstant(AddeNode->getOperand(1)))) { SelectionDAG &DAG = DCI.DAG; SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1), @@ -9513,19 +9864,84 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0)); // Return original node to notify the driver to stop replacing. - return SDValue(AddcNode, 0); + return SDValue(AddeNode, 0); } return SDValue(); } -/// PerformADDCCombine - Target-specific dag combine transform from -/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or -/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL -static SDValue PerformADDCCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const ARMSubtarget *Subtarget) { +static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) + return SDValue(); + + // Check that we have a pair of ADDC and ADDE as operands. + // Both addends of the ADDE must be zero. + SDNode* AddcNode = N->getOperand(2).getNode(); + SDNode* AddeNode = N->getOperand(3).getNode(); + if ((AddcNode->getOpcode() == ARMISD::ADDC) && + (AddeNode->getOpcode() == ARMISD::ADDE) && + isNullConstant(AddeNode->getOperand(0)) && + isNullConstant(AddeNode->getOperand(1)) && + (AddeNode->getOperand(2).getNode() == AddcNode)) + return DAG.getNode(ARMISD::UMAAL, SDLoc(N), + DAG.getVTList(MVT::i32, MVT::i32), + {N->getOperand(0), N->getOperand(1), + AddcNode->getOperand(0), AddcNode->getOperand(1)}); + else + return SDValue(); +} + +static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + if (Subtarget->isThumb1Only()) { + SDValue RHS = N->getOperand(1); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) { + int32_t imm = C->getSExtValue(); + if (imm < 0 && imm > INT_MIN) { + SDLoc DL(N); + RHS = DAG.getConstant(-imm, DL, MVT::i32); + unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC + : ARMISD::ADDC; + return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS); + } + } + } + return SDValue(); +} - if (Subtarget->isThumb1Only()) return SDValue(); +static SDValue PerformAddeSubeCombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + if (Subtarget->isThumb1Only()) { + SDValue RHS = N->getOperand(1); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) { + int64_t imm = C->getSExtValue(); + if (imm < 0) { + SDLoc DL(N); + + // The with-carry-in form matches bitwise not instead of the negation. + // Effectively, the inverse interpretation of the carry flag already + // accounts for part of the negation. + RHS = DAG.getConstant(~imm, DL, MVT::i32); + + unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE + : ARMISD::ADDE; + return DAG.getNode(Opcode, DL, N->getVTList(), + N->getOperand(0), RHS, N->getOperand(2)); + } + } + } + return SDValue(); +} + +/// PerformADDECombine - Target-specific dag combine transform from +/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or +/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL +static SDValue PerformADDECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Only ARM and Thumb2 support UMLAL/SMLAL. + if (Subtarget->isThumb1Only()) + return PerformAddeSubeCombine(N, DCI.DAG, Subtarget); // Only perform the checks after legalize when the pattern is available. if (DCI.isBeforeLegalize()) return SDValue(); @@ -9722,7 +10138,6 @@ static SDValue PerformMULCombine(SDNode *N, static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { - // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); SDLoc dl(N); @@ -9761,6 +10176,67 @@ static SDValue PerformANDCombine(SDNode *N, return SDValue(); } +// Try combining OR nodes to SMULWB, SMULWT. +static SDValue PerformORCombineToSMULWBT(SDNode *OR, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + if (!Subtarget->hasV6Ops() || + (Subtarget->isThumb() && + (!Subtarget->hasThumb2() || !Subtarget->hasDSP()))) + return SDValue(); + + SDValue SRL = OR->getOperand(0); + SDValue SHL = OR->getOperand(1); + + if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) { + SRL = OR->getOperand(1); + SHL = OR->getOperand(0); + } + if (!isSRL16(SRL) || !isSHL16(SHL)) + return SDValue(); + + // The first operands to the shifts need to be the two results from the + // same smul_lohi node. + if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) || + SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI) + return SDValue(); + + SDNode *SMULLOHI = SRL.getOperand(0).getNode(); + if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) || + SHL.getOperand(0) != SDValue(SMULLOHI, 1)) + return SDValue(); + + // Now we have: + // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16))) + // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments. + // For SMUWB the 16-bit value will signed extended somehow. + // For SMULWT only the SRA is required. + // Check both sides of SMUL_LOHI + SDValue OpS16 = SMULLOHI->getOperand(0); + SDValue OpS32 = SMULLOHI->getOperand(1); + + SelectionDAG &DAG = DCI.DAG; + if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) { + OpS16 = OpS32; + OpS32 = SMULLOHI->getOperand(0); + } + + SDLoc dl(OR); + unsigned Opcode = 0; + if (isS16(OpS16, DAG)) + Opcode = ARMISD::SMULWB; + else if (isSRA16(OpS16)) { + Opcode = ARMISD::SMULWT; + OpS16 = OpS16->getOperand(0); + } + else + return SDValue(); + + SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16); + DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res); + return SDValue(OR, 0); +} + /// PerformORCombine - Target-specific dag combine xforms for ISD::OR static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, @@ -9798,6 +10274,8 @@ static SDValue PerformORCombine(SDNode *N, // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) return Result; + if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget)) + return Result; } // The code below optimizes (or (and X, Y), Z). @@ -9906,7 +10384,7 @@ static SDValue PerformORCombine(SDNode *N, (Mask == ~Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. - if (Subtarget->hasT2ExtractPack() && + if (Subtarget->hasDSP() && (Mask == 0xffff || Mask == 0xffff0000)) return SDValue(); // 2a @@ -9922,7 +10400,7 @@ static SDValue PerformORCombine(SDNode *N, (~Mask == Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. - if (Subtarget->hasT2ExtractPack() && + if (Subtarget->hasDSP() && (Mask2 == 0xffff || Mask2 == 0xffff0000)) return SDValue(); // 2b @@ -10485,11 +10963,8 @@ static SDValue CombineBaseUpdate(SDNode *N, // If the increment is a constant, it must match the memory ref size. SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); - if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { - uint64_t IncVal = CInc->getZExtValue(); - if (IncVal != NumBytes) - continue; - } else if (NumBytes >= 3 * 16) { + ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode()); + if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) { // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two // separate instructions that make it harder to use a non-constant update. continue; @@ -11306,34 +11781,6 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero, - APInt &KnownOne) { - if (Op.getOpcode() == ARMISD::BFI) { - // Conservatively, we can recurse down the first operand - // and just mask out all affected bits. - computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne); - - // The operand to BFI is already a mask suitable for removing the bits it - // sets. - ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2)); - const APInt &Mask = CI->getAPIntValue(); - KnownZero &= Mask; - KnownOne &= Mask; - return; - } - if (Op.getOpcode() == ARMISD::CMOV) { - APInt KZ2(KnownZero.getBitWidth(), 0); - APInt KO2(KnownOne.getBitWidth(), 0); - computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne); - computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2); - - KnownZero &= KZ2; - KnownOne &= KO2; - return; - } - return DAG.computeKnownBits(Op, KnownZero, KnownOne); -} - SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { // If we have a CMOV, OR and AND combination such as: // if (x & CN) @@ -11394,9 +11841,9 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D // Lastly, can we determine that the bits defined by OrCI // are zero in Y? - APInt KnownZero, KnownOne; - computeKnownBits(DAG, Y, KnownZero, KnownOne); - if ((OrCI & KnownZero) != OrCI) + KnownBits Known; + DAG.computeKnownBits(Y, Known); + if ((OrCI & Known.Zero) != OrCI) return SDValue(); // OK, we can do the combine. @@ -11534,16 +11981,16 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { } if (Res.getNode()) { - APInt KnownZero, KnownOne; - DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); + KnownBits Known; + DAG.computeKnownBits(SDValue(N,0), Known); // Capture demanded bits information that would be otherwise lost. - if (KnownZero == 0xfffffffe) + if (Known.Zero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i1)); - else if (KnownZero == 0xffffff00) + else if (Known.Zero == 0xffffff00) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i8)); - else if (KnownZero == 0xffff0000) + else if (Known.Zero == 0xffff0000) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i16)); } @@ -11555,13 +12002,17 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; - case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); + case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); + case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget); case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); + case ARMISD::ADDC: + case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget); + case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); @@ -11593,6 +12044,56 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return PerformVLDCombine(N, DCI); case ARMISD::BUILD_VECTOR: return PerformARMBUILD_VECTORCombine(N, DCI); + case ARMISD::SMULWB: { + unsigned BitWidth = N->getValueType(0).getSizeInBits(); + APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); + if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)) + return SDValue(); + break; + } + case ARMISD::SMULWT: { + unsigned BitWidth = N->getValueType(0).getSizeInBits(); + APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16); + if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)) + return SDValue(); + break; + } + case ARMISD::SMLALBB: { + unsigned BitWidth = N->getValueType(0).getSizeInBits(); + APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); + if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || + (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))) + return SDValue(); + break; + } + case ARMISD::SMLALBT: { + unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits(); + APInt LowMask = APInt::getLowBitsSet(LowWidth, 16); + unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits(); + APInt HighMask = APInt::getHighBitsSet(HighWidth, 16); + if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) || + (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI))) + return SDValue(); + break; + } + case ARMISD::SMLALTB: { + unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits(); + APInt HighMask = APInt::getHighBitsSet(HighWidth, 16); + unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits(); + APInt LowMask = APInt::getLowBitsSet(LowWidth, 16); + if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) || + (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI))) + return SDValue(); + break; + } + case ARMISD::SMLALTT: { + unsigned BitWidth = N->getValueType(0).getSizeInBits(); + APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16); + if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || + (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))) + return SDValue(); + break; + } case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { @@ -11688,12 +12189,6 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, } } - // Lowering to i32/i16 if the size permits. - if (Size >= 4) - return MVT::i32; - else if (Size >= 2) - return MVT::i16; - // Let the target-independent logic figure it out. return MVT::Other; } @@ -12178,12 +12673,12 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, } void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, + const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - unsigned BitWidth = KnownOne.getBitWidth(); - KnownZero = KnownOne = APInt(BitWidth, 0); + unsigned BitWidth = Known.getBitWidth(); + Known.resetAll(); switch (Op.getOpcode()) { default: break; case ARMISD::ADDC: @@ -12193,17 +12688,18 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, // These nodes' second result is a boolean if (Op.getResNo() == 0) break; - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - if (KnownZero == 0 && KnownOne == 0) return; + DAG.computeKnownBits(Op.getOperand(0), Known, Depth+1); + if (Known.isUnknown()) + return; - APInt KnownZeroRHS, KnownOneRHS; - DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); - KnownZero &= KnownZeroRHS; - KnownOne &= KnownOneRHS; + KnownBits KnownRHS; + DAG.computeKnownBits(Op.getOperand(1), KnownRHS, Depth+1); + Known.Zero &= KnownRHS.Zero; + Known.One &= KnownRHS.One; return; } case ISD::INTRINSIC_W_CHAIN: { @@ -12215,11 +12711,24 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case Intrinsic::arm_ldrex: { EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); return; } } } + case ARMISD::BFI: { + // Conservatively, we can recurse down the first operand + // and just mask out all affected bits. + DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1); + + // The operand to BFI is already a mask suitable for removing the bits it + // sets. + ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2)); + const APInt &Mask = CI->getAPIntValue(); + Known.Zero &= Mask; + Known.One &= Mask; + return; + } } } @@ -12588,8 +13097,8 @@ static TargetLowering::ArgListTy getDivRemArgList( Type *ArgTy = ArgVT.getTypeForEVT(*Context); Entry.Node = N->getOperand(i); Entry.Ty = ArgTy; - Entry.isSExt = isSigned; - Entry.isZExt = !isSigned; + Entry.IsSExt = isSigned; + Entry.IsZExt = !isSigned; Args.push_back(Entry); } if (Subtarget->isTargetWindows() && Args.size() >= 2) @@ -12615,7 +13124,9 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { // rem = a - b * div // return {div, rem} // This should be lowered into UDIV/SDIV + MLS later on. - if (Subtarget->hasDivide() && Op->getValueType(0).isSimple() && + bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() + : Subtarget->hasDivideInARMMode(); + if (hasDivide && Op->getValueType(0).isSimple() && Op->getSimpleValueType(0) == MVT::i32) { unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; const SDValue Dividend = Op->getOperand(0); @@ -12639,7 +13150,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); - Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr); + Type *RetTy = StructType::get(Ty, Ty); if (Subtarget->isTargetWindows()) InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain); @@ -12861,7 +13372,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } case Intrinsic::arm_stlexd: - case Intrinsic::arm_strexd: { + case Intrinsic::arm_strexd: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(2); @@ -12871,9 +13382,9 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.readMem = false; Info.writeMem = true; return true; - } + case Intrinsic::arm_ldaexd: - case Intrinsic::arm_ldrexd: { + case Intrinsic::arm_ldrexd: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(0); @@ -12883,7 +13394,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.readMem = true; Info.writeMem = false; return true; - } + default: break; } @@ -12921,7 +13432,7 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get // here. if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) { - Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr); + Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr); Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(7), Builder.getInt32(10), Builder.getInt32(5)}; @@ -12932,7 +13443,7 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, llvm_unreachable("makeDMB on a target so old that it has no barriers"); } } else { - Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); + Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); // Only a full system barrier exists in the M-class architectures. Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain; Constant *CDomain = Builder.getInt32(Domain); @@ -12941,9 +13452,9 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, } // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html -Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, - AtomicOrdering Ord, bool IsStore, - bool IsLoad) const { +Instruction *ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { switch (Ord) { case AtomicOrdering::NotAtomic: case AtomicOrdering::Unordered: @@ -12952,7 +13463,7 @@ Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, case AtomicOrdering::Acquire: return nullptr; // Nothing to do case AtomicOrdering::SequentiallyConsistent: - if (!IsStore) + if (!Inst->hasAtomicStore()) return nullptr; // Nothing to do /*FALLTHROUGH*/ case AtomicOrdering::Release: @@ -12966,9 +13477,9 @@ Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, llvm_unreachable("Unknown fence ordering in emitLeadingFence"); } -Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, - AtomicOrdering Ord, bool IsStore, - bool IsLoad) const { +Instruction *ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { switch (Ord) { case AtomicOrdering::NotAtomic: case AtomicOrdering::Unordered: @@ -13089,7 +13600,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, if (ValTy->getPrimitiveSizeInBits() == 64) { Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; - Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int); + Function *Ldrex = Intrinsic::getDeclaration(M, Int); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); @@ -13106,7 +13617,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, Type *Tys[] = { Addr->getType() }; Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; - Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys); + Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys); return Builder.CreateTruncOrBitCast( Builder.CreateCall(Ldrex, Addr), @@ -13118,7 +13629,7 @@ void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance( if (!Subtarget->hasV7Ops()) return; Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - Builder.CreateCall(llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_clrex)); + Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex)); } Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, @@ -13154,6 +13665,39 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Addr}); } +/// A helper function for determining the number of interleaved accesses we +/// will generate when lowering accesses of the given type. +unsigned +ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy, + const DataLayout &DL) const { + return (DL.getTypeSizeInBits(VecTy) + 127) / 128; +} + +bool ARMTargetLowering::isLegalInterleavedAccessType( + VectorType *VecTy, const DataLayout &DL) const { + + unsigned VecSize = DL.getTypeSizeInBits(VecTy); + unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType()); + + // Ensure the vector doesn't have f16 elements. Even though we could do an + // i16 vldN, we can't hold the f16 vectors and will end up converting via + // f32. + if (VecTy->getElementType()->isHalfTy()) + return false; + + // Ensure the number of vector elements is greater than 1. + if (VecTy->getNumElements() < 2) + return false; + + // Ensure the element type is legal. + if (ElSize != 8 && ElSize != 16 && ElSize != 32) + return false; + + // Ensure the total vector size is 64 or a multiple of 128. Types larger than + // 128 will be split into multiple interleaved accesses. + return VecSize == 64 || VecSize % 128 == 0; +} + /// \brief Lower an interleaved load into a vldN intrinsic. /// /// E.g. Lower an interleaved load (Factor = 2): @@ -13178,64 +13722,99 @@ bool ARMTargetLowering::lowerInterleavedLoad( Type *EltTy = VecTy->getVectorElementType(); const DataLayout &DL = LI->getModule()->getDataLayout(); - unsigned VecSize = DL.getTypeSizeInBits(VecTy); - bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; - // Skip if we do not have NEON and skip illegal vector types and vector types - // with i64/f64 elements (vldN doesn't support i64/f64 elements). - if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits) + // Skip if we do not have NEON and skip illegal vector types. We can + // "legalize" wide vector types into multiple interleaved accesses as long as + // the vector types are divisible by 128. + if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL)) return false; + unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL); + // A pointer vector can not be the return type of the ldN intrinsics. Need to // load integer vectors first and then convert to pointer vectors. if (EltTy->isPointerTy()) VecTy = VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements()); + IRBuilder<> Builder(LI); + + // The base address of the load. + Value *BaseAddr = LI->getPointerOperand(); + + if (NumLoads > 1) { + // If we're going to generate more than one load, reset the sub-vector type + // to something legal. + VecTy = VectorType::get(VecTy->getVectorElementType(), + VecTy->getVectorNumElements() / NumLoads); + + // We will compute the pointer operand of each load from the original base + // address using GEPs. Cast the base address to a pointer to the scalar + // element type. + BaseAddr = Builder.CreateBitCast( + BaseAddr, VecTy->getVectorElementType()->getPointerTo( + LI->getPointerAddressSpace())); + } + + assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!"); + + Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace()); + Type *Tys[] = {VecTy, Int8Ptr}; static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2, Intrinsic::arm_neon_vld3, Intrinsic::arm_neon_vld4}; + Function *VldnFunc = + Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); - IRBuilder<> Builder(LI); - SmallVector<Value *, 2> Ops; + // Holds sub-vectors extracted from the load intrinsic return values. The + // sub-vectors are associated with the shufflevector instructions they will + // replace. + DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs; - Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace()); - Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr)); - Ops.push_back(Builder.getInt32(LI->getAlignment())); + for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) { - Type *Tys[] = { VecTy, Int8Ptr }; - Function *VldnFunc = - Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); - CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN"); + // If we're generating more than one load, compute the base address of + // subsequent loads as an offset from the previous. + if (LoadCount > 0) + BaseAddr = Builder.CreateConstGEP1_32( + BaseAddr, VecTy->getVectorNumElements() * Factor); - // Replace uses of each shufflevector with the corresponding vector loaded - // by ldN. - for (unsigned i = 0; i < Shuffles.size(); i++) { - ShuffleVectorInst *SV = Shuffles[i]; - unsigned Index = Indices[i]; + SmallVector<Value *, 2> Ops; + Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr)); + Ops.push_back(Builder.getInt32(LI->getAlignment())); - Value *SubVec = Builder.CreateExtractValue(VldN, Index); + CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN"); - // Convert the integer vector to pointer vector if the element is pointer. - if (EltTy->isPointerTy()) - SubVec = Builder.CreateIntToPtr(SubVec, SV->getType()); + // Replace uses of each shufflevector with the corresponding vector loaded + // by ldN. + for (unsigned i = 0; i < Shuffles.size(); i++) { + ShuffleVectorInst *SV = Shuffles[i]; + unsigned Index = Indices[i]; - SV->replaceAllUsesWith(SubVec); - } + Value *SubVec = Builder.CreateExtractValue(VldN, Index); - return true; -} + // Convert the integer vector to pointer vector if the element is pointer. + if (EltTy->isPointerTy()) + SubVec = Builder.CreateIntToPtr( + SubVec, VectorType::get(SV->getType()->getVectorElementType(), + VecTy->getVectorNumElements())); -/// \brief Get a mask consisting of sequential integers starting from \p Start. -/// -/// I.e. <Start, Start + 1, ..., Start + NumElts - 1> -static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start, - unsigned NumElts) { - SmallVector<Constant *, 16> Mask; - for (unsigned i = 0; i < NumElts; i++) - Mask.push_back(Builder.getInt32(Start + i)); + SubVecs[SV].push_back(SubVec); + } + } - return ConstantVector::get(Mask); + // Replace uses of the shufflevector instructions with the sub-vectors + // returned by the load intrinsic. If a shufflevector instruction is + // associated with more than one sub-vector, those sub-vectors will be + // concatenated into a single wide vector. + for (ShuffleVectorInst *SVI : Shuffles) { + auto &SubVec = SubVecs[SVI]; + auto *WideVec = + SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0]; + SVI->replaceAllUsesWith(WideVec); + } + + return true; } /// \brief Lower an interleaved store into a vstN intrinsic. @@ -13279,15 +13858,15 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, VectorType *SubVecTy = VectorType::get(EltTy, LaneLen); const DataLayout &DL = SI->getModule()->getDataLayout(); - unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); - bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; - // Skip if we do not have NEON and skip illegal vector types and vector types - // with i64/f64 elements (vstN doesn't support i64/f64 elements). - if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) || - EltIs64Bits) + // Skip if we do not have NEON and skip illegal vector types. We can + // "legalize" wide vector types into multiple interleaved accesses as long as + // the vector types are divisible by 128. + if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL)) return false; + unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL); + Value *Op0 = SVI->getOperand(0); Value *Op1 = SVI->getOperand(1); IRBuilder<> Builder(SI); @@ -13306,44 +13885,75 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, SubVecTy = VectorType::get(IntTy, LaneLen); } + // The base address of the store. + Value *BaseAddr = SI->getPointerOperand(); + + if (NumStores > 1) { + // If we're going to generate more than one store, reset the lane length + // and sub-vector type to something legal. + LaneLen /= NumStores; + SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen); + + // We will compute the pointer operand of each store from the original base + // address using GEPs. Cast the base address to a pointer to the scalar + // element type. + BaseAddr = Builder.CreateBitCast( + BaseAddr, SubVecTy->getVectorElementType()->getPointerTo( + SI->getPointerAddressSpace())); + } + + assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!"); + + auto Mask = SVI->getShuffleMask(); + + Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace()); + Type *Tys[] = {Int8Ptr, SubVecTy}; static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2, Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4}; - SmallVector<Value *, 6> Ops; - Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace()); - Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr)); + for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) { - Type *Tys[] = { Int8Ptr, SubVecTy }; - Function *VstNFunc = Intrinsic::getDeclaration( - SI->getModule(), StoreInts[Factor - 2], Tys); + // If we generating more than one store, we compute the base address of + // subsequent stores as an offset from the previous. + if (StoreCount > 0) + BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor); - // Split the shufflevector operands into sub vectors for the new vstN call. - auto Mask = SVI->getShuffleMask(); - for (unsigned i = 0; i < Factor; i++) { - if (Mask[i] >= 0) { - Ops.push_back(Builder.CreateShuffleVector( - Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen))); - } else { - unsigned StartMask = 0; - for (unsigned j = 1; j < LaneLen; j++) { - if (Mask[j*Factor + i] >= 0) { - StartMask = Mask[j*Factor + i] - j; - break; + SmallVector<Value *, 6> Ops; + Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr)); + + Function *VstNFunc = + Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys); + + // Split the shufflevector operands into sub vectors for the new vstN call. + for (unsigned i = 0; i < Factor; i++) { + unsigned IdxI = StoreCount * LaneLen * Factor + i; + if (Mask[IdxI] >= 0) { + Ops.push_back(Builder.CreateShuffleVector( + Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0))); + } else { + unsigned StartMask = 0; + for (unsigned j = 1; j < LaneLen; j++) { + unsigned IdxJ = StoreCount * LaneLen * Factor + j; + if (Mask[IdxJ * Factor + IdxI] >= 0) { + StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ; + break; + } } + // Note: If all elements in a chunk are undefs, StartMask=0! + // Note: Filling undef gaps with random elements is ok, since + // those elements were being written anyway (with undefs). + // In the case of all undefs we're defaulting to using elems from 0 + // Note: StartMask cannot be negative, it's checked in + // isReInterleaveMask + Ops.push_back(Builder.CreateShuffleVector( + Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0))); } - // Note: If all elements in a chunk are undefs, StartMask=0! - // Note: Filling undef gaps with random elements is ok, since - // those elements were being written anyway (with undefs). - // In the case of all undefs we're defaulting to using elems from 0 - // Note: StartMask cannot be negative, it's checked in isReInterleaveMask - Ops.push_back(Builder.CreateShuffleVector( - Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen))); } - } - Ops.push_back(Builder.getInt32(SI->getAlignment())); - Builder.CreateCall(VstNFunc, Ops); + Ops.push_back(Builder.getInt32(SI->getAlignment())); + Builder.CreateCall(VstNFunc, Ops); + } return true; } @@ -13484,3 +14094,8 @@ void ARMTargetLowering::insertCopiesSplitCSR( .addReg(NewVR); } } + +void ARMTargetLowering::finalizeLowering(MachineFunction &MF) const { + MF.getFrameInfo().computeMaxCallFrameSize(MF); + TargetLoweringBase::finalizeLowering(MF); +} diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 84c6eb8..f05b142 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -175,9 +175,19 @@ class InstrItineraryData; VMULLs, // ...signed VMULLu, // ...unsigned + SMULWB, // Signed multiply word by half word, bottom + SMULWT, // Signed multiply word by half word, top UMLAL, // 64bit Unsigned Accumulate Multiply SMLAL, // 64bit Signed Accumulate Multiply UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply + SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16 + SMLALBT, // 64-bit signed accumulate multiply bottom, top 16 + SMLALTB, // 64-bit signed accumulate multiply top, bottom 16 + SMLALTT, // 64-bit signed accumulate multiply top, top 16 + SMLALD, // Signed multiply accumulate long dual + SMLALDX, // Signed multiply accumulate long dual exchange + SMLSLD, // Signed multiply subtract long dual + SMLSLDX, // Signed multiply subtract long dual exchange // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other @@ -344,8 +354,8 @@ class InstrItineraryData; SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; - void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, - APInt &KnownOne, + void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, + const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override; @@ -473,10 +483,10 @@ class InstrItineraryData; void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override; - Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, - bool IsStore, bool IsLoad) const override; - Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, - bool IsStore, bool IsLoad) const override; + Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; + Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; unsigned getMaxSupportedInterleaveFactor() const override { return 4; } @@ -500,9 +510,19 @@ class InstrItineraryData; bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override; + bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const SelectionDAG &DAG) const override { + // Do not merge to larger than i32. + return (MemVT.getSizeInBits() <= 32); + } + bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { + return VT.isScalarInteger(); + } + bool supportSwiftError() const override { return true; } @@ -514,6 +534,19 @@ class InstrItineraryData; CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const; CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const; + /// Returns true if \p VecTy is a legal interleaved access type. This + /// function checks the vector element type and the overall width of the + /// vector. + bool isLegalInterleavedAccessType(VectorType *VecTy, + const DataLayout &DL) const; + + /// Returns the number of interleaved accesses that will be generated when + /// lowering accesses of the given type. + unsigned getNumInterleavedAccesses(VectorType *VecTy, + const DataLayout &DL) const; + + void finalizeLowering(MachineFunction &MF) const override; + protected: std::pair<const TargetRegisterClass *, uint8_t> findRepresentativeClass(const TargetRegisterInfo *TRI, @@ -569,6 +602,8 @@ class InstrItineraryData; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const; @@ -690,7 +725,7 @@ class InstrItineraryData; bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; - bool mayBeEmittedAsTailCall(CallInst *CI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, SDValue ARMcc, SDValue CCR, SDValue Cmp, @@ -698,7 +733,7 @@ class InstrItineraryData; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, - const SDLoc &dl) const; + const SDLoc &dl, bool InvalidOnQNaN) const; SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const; SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td index 488439f..1bbe7f0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -184,7 +184,7 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> { // ARM special operands for disassembly only. // -def SetEndAsmOperand : ImmAsmOperand { +def SetEndAsmOperand : ImmAsmOperand<0,1> { let Name = "SetEndImm"; let ParserMethod = "parseSetEndImm"; } @@ -221,25 +221,25 @@ def banked_reg : Operand<i32> { // 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0> // 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0> // 64 64 - <imm> is encoded in imm6<5:0> -def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; } +def shr_imm8_asm_operand : ImmAsmOperand<1,8> { let Name = "ShrImm8"; } def shr_imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 8; }]> { let EncoderMethod = "getShiftRight8Imm"; let DecoderMethod = "DecodeShiftRight8Imm"; let ParserMatchClass = shr_imm8_asm_operand; } -def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; } +def shr_imm16_asm_operand : ImmAsmOperand<1,16> { let Name = "ShrImm16"; } def shr_imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 16; }]> { let EncoderMethod = "getShiftRight16Imm"; let DecoderMethod = "DecodeShiftRight16Imm"; let ParserMatchClass = shr_imm16_asm_operand; } -def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; } +def shr_imm32_asm_operand : ImmAsmOperand<1,32> { let Name = "ShrImm32"; } def shr_imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]> { let EncoderMethod = "getShiftRight32Imm"; let DecoderMethod = "DecodeShiftRight32Imm"; let ParserMatchClass = shr_imm32_asm_operand; } -def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; } +def shr_imm64_asm_operand : ImmAsmOperand<1,64> { let Name = "ShrImm64"; } def shr_imm64 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 64; }]> { let EncoderMethod = "getShiftRight64Imm"; let DecoderMethod = "DecodeShiftRight64Imm"; @@ -261,10 +261,19 @@ def const_pool_asm_imm : Operand<i32> { // Note: When EmitPriority == 1, the alias will be used for printing class ARMInstAlias<string Asm, dag Result, bit EmitPriority = 0> : InstAlias<Asm, Result, EmitPriority>, Requires<[IsARM]>; +class ARMInstSubst<string Asm, dag Result, bit EmitPriority = 0> + : InstAlias<Asm, Result, EmitPriority>, + Requires<[IsARM,UseNegativeImmediates]>; class tInstAlias<string Asm, dag Result, bit EmitPriority = 0> : InstAlias<Asm, Result, EmitPriority>, Requires<[IsThumb]>; +class tInstSubst<string Asm, dag Result, bit EmitPriority = 0> + : InstAlias<Asm, Result, EmitPriority>, + Requires<[IsThumb,UseNegativeImmediates]>; class t2InstAlias<string Asm, dag Result, bit EmitPriority = 0> : InstAlias<Asm, Result, EmitPriority>, Requires<[IsThumb2]>; +class t2InstSubst<string Asm, dag Result, bit EmitPriority = 0> + : InstAlias<Asm, Result, EmitPriority>, + Requires<[IsThumb2,UseNegativeImmediates]>; class VFP2InstAlias<string Asm, dag Result, bit EmitPriority = 0> : InstAlias<Asm, Result, EmitPriority>, Requires<[HasVFP2]>; class VFP2DPInstAlias<string Asm, dag Result, bit EmitPriority = 0> @@ -948,7 +957,7 @@ class ADivA1I<bits<3> opcod, dag oops, dag iops, } // PKH instructions -def PKHLSLAsmOperand : ImmAsmOperand { +def PKHLSLAsmOperand : ImmAsmOperand<0,31> { let Name = "PKHLSLImm"; let ParserMethod = "parsePKHLSLImm"; } @@ -1013,9 +1022,6 @@ class Thumb2DSPPat<dag pattern, dag result> : Pat<pattern, result> { class Thumb2DSPMulPat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [IsThumb2, UseMulOps, HasDSP]; } -class Thumb2ExtractPat<dag pattern, dag result> : Pat<pattern, result> { - list<Predicate> Predicates = [IsThumb2, HasT2ExtractPack]; -} //===----------------------------------------------------------------------===// // Thumb Instruction Format Definitions. // diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp index 27b6432..a0e2ac4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -32,8 +32,8 @@ using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI() {} -/// getNoopForMachoTarget - Return the noop instruction to use for a noop. -void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +/// Return the noop instruction to use for a noop. +void ARMInstrInfo::getNoop(MCInst &NopInst) const { if (hasNOP()) { NopInst.setOpcode(ARM::HINT); NopInst.addOperand(MCOperand::createImm(0)); @@ -129,8 +129,9 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const { MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4); MIB.addMemOperand(MMO); - MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg); - MIB.addReg(Reg, RegState::Kill).addImm(0); - MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); - AddDefaultPred(MIB); + BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg) + .addReg(Reg, RegState::Kill) + .addImm(0) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()) + .add(predOps(ARMCC::AL)); } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h index 4b1b709..c87fb97 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h @@ -25,8 +25,8 @@ class ARMInstrInfo : public ARMBaseInstrInfo { public: explicit ARMInstrInfo(const ARMSubtarget &STI); - /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - void getNoopForMachoTarget(MCInst &NopInst) const override; + /// Return the noop instruction to use for a noop. + void getNoop(MCInst &NopInst) const override; // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index c473939..7206083 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -16,7 +16,8 @@ // // Type profiles. -def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; def SDT_ARMStructByVal : SDTypeProfile<0, 4, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, @@ -51,6 +52,8 @@ def SDT_ARMAnd : SDTypeProfile<1, 2, SDTCisVT<2, i32>]>; def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; +def SDT_ARMFCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; @@ -90,6 +93,18 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, SDTCisVT<1, i32>, SDTCisVT<4, i32>]>; +def SDT_LongMac : SDTypeProfile<2, 4, [SDTCisVT<0, i32>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisSameAs<0, 4>, + SDTCisSameAs<0, 5>]>; + +def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>; +def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>; +def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>; +def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; @@ -181,6 +196,13 @@ def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad]>; +def ARMsmulwb : SDNode<"ARMISD::SMULWB", SDTIntBinOp, []>; +def ARMsmulwt : SDNode<"ARMISD::SMULWT", SDTIntBinOp, []>; +def ARMsmlalbb : SDNode<"ARMISD::SMLALBB", SDT_LongMac, []>; +def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>; +def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>; +def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>; + //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // @@ -243,13 +265,10 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float conversions">; def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, AssemblerPredicate<"FeatureFullFP16","full half-float">; -def HasDivide : Predicate<"Subtarget->hasDivide()">, - AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">; +def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">, + AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">; def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">; -def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">, - AssemblerPredicate<"FeatureT2XtPk", - "pack/extract">; def HasDSP : Predicate<"Subtarget->hasDSP()">, AssemblerPredicate<"FeatureDSP", "dsp">; def HasDB : Predicate<"Subtarget->hasDataBarrier()">, @@ -298,9 +317,16 @@ def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">, AssemblerPredicate<"FeatureNaClTrap", "NaCl">; def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">; +def UseNegativeImmediates : + Predicate<"false">, + AssemblerPredicate<"!FeatureNoNegativeImmediates", + "NegativeImmediates">; + // FIXME: Eventually this will be just "hasV6T2Ops". -def UseMovt : Predicate<"Subtarget->useMovt(*MF)">; -def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">; +let RecomputePerFunction = 1 in { + def UseMovt : Predicate<"Subtarget->useMovt(*MF)">; + def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">; +} def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; def UseMulOps : Predicate<"Subtarget->useMulOps()">; @@ -327,8 +353,10 @@ def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||" def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&" "Subtarget->useNEONForSinglePrecisionFP()">; -def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">; -def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">; +let RecomputePerFunction = 1 in { + def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">; + def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">; +} def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">; @@ -423,7 +451,16 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ // // Immediate operands with a shared generic asm render method. -class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; } +class ImmAsmOperand<int Low, int High> : AsmOperandClass { + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; + let DiagnosticType = "ImmRange" # Low # "_" # High; +} + +class ImmAsmOperandMinusOne<int Low, int High> : AsmOperandClass { + let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; + let DiagnosticType = "ImmRange" # Low # "_" # High; +} // Operands that are part of a memory addressing mode. class MemOperand : Operand<i32> { let OperandType = "OPERAND_MEMORY"; } @@ -645,35 +682,45 @@ def arm_i32imm : PatLeaf<(imm), [{ }]>; /// imm0_1 predicate - Immediate in the range [0,1]. -def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; } +def Imm0_1AsmOperand: ImmAsmOperand<0,1> { let Name = "Imm0_1"; } def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; } /// imm0_3 predicate - Immediate in the range [0,3]. -def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; } +def Imm0_3AsmOperand: ImmAsmOperand<0,3> { let Name = "Imm0_3"; } def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; } /// imm0_7 predicate - Immediate in the range [0,7]. -def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; } +def Imm0_7AsmOperand: ImmAsmOperand<0,7> { + let Name = "Imm0_7"; +} def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 8; }]> { let ParserMatchClass = Imm0_7AsmOperand; } +/// imm8_255 predicate - Immediate in the range [8,255]. +def Imm8_255AsmOperand: ImmAsmOperand<8,255> { let Name = "Imm8_255"; } +def imm8_255 : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 8 && Imm < 256; +}]> { + let ParserMatchClass = Imm8_255AsmOperand; +} + /// imm8 predicate - Immediate is exactly 8. -def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; } +def Imm8AsmOperand: ImmAsmOperand<8,8> { let Name = "Imm8"; } def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> { let ParserMatchClass = Imm8AsmOperand; } /// imm16 predicate - Immediate is exactly 16. -def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; } +def Imm16AsmOperand: ImmAsmOperand<16,16> { let Name = "Imm16"; } def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> { let ParserMatchClass = Imm16AsmOperand; } /// imm32 predicate - Immediate is exactly 32. -def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; } +def Imm32AsmOperand: ImmAsmOperand<32,32> { let Name = "Imm32"; } def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> { let ParserMatchClass = Imm32AsmOperand; } @@ -681,25 +728,25 @@ def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> { def imm8_or_16 : ImmLeaf<i32, [{ return Imm == 8 || Imm == 16;}]>; /// imm1_7 predicate - Immediate in the range [1,7]. -def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; } +def Imm1_7AsmOperand: ImmAsmOperand<1,7> { let Name = "Imm1_7"; } def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> { let ParserMatchClass = Imm1_7AsmOperand; } /// imm1_15 predicate - Immediate in the range [1,15]. -def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; } +def Imm1_15AsmOperand: ImmAsmOperand<1,15> { let Name = "Imm1_15"; } def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> { let ParserMatchClass = Imm1_15AsmOperand; } /// imm1_31 predicate - Immediate in the range [1,31]. -def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; } +def Imm1_31AsmOperand: ImmAsmOperand<1,31> { let Name = "Imm1_31"; } def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> { let ParserMatchClass = Imm1_31AsmOperand; } /// imm0_15 predicate - Immediate in the range [0,15]. -def Imm0_15AsmOperand: ImmAsmOperand { +def Imm0_15AsmOperand: ImmAsmOperand<0,15> { let Name = "Imm0_15"; let DiagnosticType = "ImmRange0_15"; } @@ -710,7 +757,7 @@ def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ } /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. -def Imm0_31AsmOperand: ImmAsmOperand { let Name = "Imm0_31"; } +def Imm0_31AsmOperand: ImmAsmOperand<0,31> { let Name = "Imm0_31"; } def imm0_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; }]> { @@ -718,15 +765,15 @@ def imm0_31 : Operand<i32>, ImmLeaf<i32, [{ } /// imm0_32 predicate - True if the 32-bit immediate is in the range [0,32]. -def Imm0_32AsmOperand: ImmAsmOperand { let Name = "Imm0_32"; } +def Imm0_32AsmOperand: ImmAsmOperand<0,32> { let Name = "Imm0_32"; } def imm0_32 : Operand<i32>, ImmLeaf<i32, [{ - return Imm >= 0 && Imm < 32; + return Imm >= 0 && Imm < 33; }]> { let ParserMatchClass = Imm0_32AsmOperand; } /// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63]. -def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; } +def Imm0_63AsmOperand: ImmAsmOperand<0,63> { let Name = "Imm0_63"; } def imm0_63 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 64; }]> { @@ -734,7 +781,7 @@ def imm0_63 : Operand<i32>, ImmLeaf<i32, [{ } /// imm0_239 predicate - Immediate in the range [0,239]. -def Imm0_239AsmOperand : ImmAsmOperand { +def Imm0_239AsmOperand : ImmAsmOperand<0,239> { let Name = "Imm0_239"; let DiagnosticType = "ImmRange0_239"; } @@ -743,13 +790,13 @@ def imm0_239 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 240; }]> { } /// imm0_255 predicate - Immediate in the range [0,255]. -def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; } +def Imm0_255AsmOperand : ImmAsmOperand<0,255> { let Name = "Imm0_255"; } def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> { let ParserMatchClass = Imm0_255AsmOperand; } -/// imm0_65535 - An immediate is in the range [0.65535]. -def Imm0_65535AsmOperand: ImmAsmOperand { let Name = "Imm0_65535"; } +/// imm0_65535 - An immediate is in the range [0,65535]. +def Imm0_65535AsmOperand: ImmAsmOperand<0,65535> { let Name = "Imm0_65535"; } def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 65536; }]> { @@ -767,19 +814,23 @@ def imm0_65535_neg : Operand<i32>, ImmLeaf<i32, [{ // FIXME: This really needs a Thumb version separate from the ARM version. // While the range is the same, and can thus use the same match class, // the encoding is different so it should have a different encoder method. -def Imm0_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm0_65535Expr"; } +def Imm0_65535ExprAsmOperand: AsmOperandClass { + let Name = "Imm0_65535Expr"; + let RenderMethod = "addImmOperands"; +} + def imm0_65535_expr : Operand<i32> { let EncoderMethod = "getHiLo16ImmOpValue"; let ParserMatchClass = Imm0_65535ExprAsmOperand; } -def Imm256_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm256_65535Expr"; } +def Imm256_65535ExprAsmOperand: ImmAsmOperand<256,65535> { let Name = "Imm256_65535Expr"; } def imm256_65535_expr : Operand<i32> { let ParserMatchClass = Imm256_65535ExprAsmOperand; } /// imm24b - True if the 32-bit immediate is encodable in 24 bits. -def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; } +def Imm24bitAsmOperand: ImmAsmOperand<0,0xffffff> { let Name = "Imm24bit"; } def imm24b : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 0xffffff; }]> { @@ -808,7 +859,9 @@ def imm1_32_XFORM: SDNodeXForm<imm, [{ return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, SDLoc(N), MVT::i32); }]>; -def Imm1_32AsmOperand: AsmOperandClass { let Name = "Imm1_32"; } +def Imm1_32AsmOperand: ImmAsmOperandMinusOne<1,32> { + let Name = "Imm1_32"; +} def imm1_32 : Operand<i32>, PatLeaf<(imm), [{ uint64_t Imm = N->getZExtValue(); return Imm > 0 && Imm <= 32; @@ -822,8 +875,10 @@ def imm1_16_XFORM: SDNodeXForm<imm, [{ return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, SDLoc(N), MVT::i32); }]>; -def Imm1_16AsmOperand: AsmOperandClass { let Name = "Imm1_16"; } -def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }], +def Imm1_16AsmOperand: ImmAsmOperandMinusOne<1,16> { let Name = "Imm1_16"; } +def imm1_16 : Operand<i32>, ImmLeaf<i32, [{ + return Imm > 0 && Imm <= 16; + }], imm1_16_XFORM> { let PrintMethod = "printImmPlusOneOperand"; let ParserMatchClass = Imm1_16AsmOperand; @@ -1914,8 +1969,8 @@ PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary, [(ARMcallseq_end timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKDOWN : -PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, - [(ARMcallseq_start timm:$amt)]>; +PseudoInst<(outs), (ins i32imm:$amt, i32imm:$amt2, pred:$p), NoItinerary, + [(ARMcallseq_start timm:$amt, timm:$amt2)]>; } def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary, @@ -1936,7 +1991,9 @@ def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>; def : InstAlias<"esb$p", (HINT 16, pred:$p)>, Requires<[IsARM, HasRAS]>; def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", - "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { + "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (int_arm_sel GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV6]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -3425,8 +3482,12 @@ def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), (SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">; +def : ARMV6Pat<(int_arm_sxtb16 GPR:$Src), + (SXTB16 GPR:$Src, 0)>; def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">; +def : ARMV6Pat<(int_arm_sxtab16 GPR:$LHS, GPR:$RHS), + (SXTAB16 GPR:$LHS, GPR:$RHS, 0)>; // Zero extenders @@ -3446,6 +3507,8 @@ def UXTB16 : AI_ext_rrot<0b01101100, // (UXTB16r_rot GPR:$Src, 3)>; def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), (UXTB16 GPR:$Src, 1)>; +def : ARMV6Pat<(int_arm_uxtb16 GPR:$Src), + (UXTB16 GPR:$Src, 0)>; def UXTAB : AI_exta_rrot<0b01101110, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; @@ -3460,6 +3523,8 @@ def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), // This isn't safe in general, the add is two 16-bit units, not a 32-bit add. def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">; +def : ARMV6Pat<(int_arm_uxtab16 GPR:$LHS, GPR:$RHS), + (UXTAB16 GPR:$LHS, GPR:$RHS, 0)>; def SBFX : I<(outs GPRnopc:$Rd), @@ -3586,71 +3651,85 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc, let Unpredictable{11-8} = 0b1111; } -// Saturating add/subtract +// Wrappers around the AAI class +class AAIRevOpr<bits<8> op27_20, bits<8> op11_4, string opc, + list<dag> pattern = []> + : AAI<op27_20, op11_4, opc, + pattern, + (ins GPRnopc:$Rm, GPRnopc:$Rn), + "\t$Rd, $Rm, $Rn">; + +class AAIIntrinsic<bits<8> op27_20, bits<8> op11_4, string opc, + Intrinsic intrinsic> + : AAI<op27_20, op11_4, opc, + [(set GPRnopc:$Rd, (intrinsic GPRnopc:$Rn, GPRnopc:$Rm))]>; +// Saturating add/subtract +let hasSideEffects = 1 in { +def QADD8 : AAIIntrinsic<0b01100010, 0b11111001, "qadd8", int_arm_qadd8>; +def QADD16 : AAIIntrinsic<0b01100010, 0b11110001, "qadd16", int_arm_qadd16>; +def QSUB16 : AAIIntrinsic<0b01100010, 0b11110111, "qsub16", int_arm_qsub16>; +def QSUB8 : AAIIntrinsic<0b01100010, 0b11111111, "qsub8", int_arm_qsub8>; + +def QDADD : AAIRevOpr<0b00010100, 0b00000101, "qdadd", + [(set GPRnopc:$Rd, (int_arm_qadd (int_arm_qadd GPRnopc:$Rm, + GPRnopc:$Rm), + GPRnopc:$Rn))]>; +def QDSUB : AAIRevOpr<0b00010110, 0b00000101, "qdsub", + [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, + (int_arm_qadd GPRnopc:$Rn, GPRnopc:$Rn)))]>; +def QSUB : AAIRevOpr<0b00010010, 0b00000101, "qsub", + [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))]>; let DecoderMethod = "DecodeQADDInstruction" in -def QADD : AAI<0b00010000, 0b00000101, "qadd", - [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))], - (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; - -def QSUB : AAI<0b00010010, 0b00000101, "qsub", - [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))], - (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; -def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [], - (ins GPRnopc:$Rm, GPRnopc:$Rn), - "\t$Rd, $Rm, $Rn">; -def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [], - (ins GPRnopc:$Rm, GPRnopc:$Rn), - "\t$Rd, $Rm, $Rn">; - -def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">; -def QADD8 : AAI<0b01100010, 0b11111001, "qadd8">; -def QASX : AAI<0b01100010, 0b11110011, "qasx">; -def QSAX : AAI<0b01100010, 0b11110101, "qsax">; -def QSUB16 : AAI<0b01100010, 0b11110111, "qsub16">; -def QSUB8 : AAI<0b01100010, 0b11111111, "qsub8">; -def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">; -def UQADD8 : AAI<0b01100110, 0b11111001, "uqadd8">; -def UQASX : AAI<0b01100110, 0b11110011, "uqasx">; -def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">; -def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">; -def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">; + def QADD : AAIRevOpr<0b00010000, 0b00000101, "qadd", + [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>; +} + +def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>; +def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>; +def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>; +def UQSUB8 : AAIIntrinsic<0b01100110, 0b11111111, "uqsub8", int_arm_uqsub8>; +def QASX : AAIIntrinsic<0b01100010, 0b11110011, "qasx", int_arm_qasx>; +def QSAX : AAIIntrinsic<0b01100010, 0b11110101, "qsax", int_arm_qsax>; +def UQASX : AAIIntrinsic<0b01100110, 0b11110011, "uqasx", int_arm_uqasx>; +def UQSAX : AAIIntrinsic<0b01100110, 0b11110101, "uqsax", int_arm_uqsax>; // Signed/Unsigned add/subtract -def SASX : AAI<0b01100001, 0b11110011, "sasx">; -def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">; -def SADD8 : AAI<0b01100001, 0b11111001, "sadd8">; -def SSAX : AAI<0b01100001, 0b11110101, "ssax">; -def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">; -def SSUB8 : AAI<0b01100001, 0b11111111, "ssub8">; -def UASX : AAI<0b01100101, 0b11110011, "uasx">; -def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">; -def UADD8 : AAI<0b01100101, 0b11111001, "uadd8">; -def USAX : AAI<0b01100101, 0b11110101, "usax">; -def USUB16 : AAI<0b01100101, 0b11110111, "usub16">; -def USUB8 : AAI<0b01100101, 0b11111111, "usub8">; +def SASX : AAIIntrinsic<0b01100001, 0b11110011, "sasx", int_arm_sasx>; +def SADD16 : AAIIntrinsic<0b01100001, 0b11110001, "sadd16", int_arm_sadd16>; +def SADD8 : AAIIntrinsic<0b01100001, 0b11111001, "sadd8", int_arm_sadd8>; +def SSAX : AAIIntrinsic<0b01100001, 0b11110101, "ssax", int_arm_ssax>; +def SSUB16 : AAIIntrinsic<0b01100001, 0b11110111, "ssub16", int_arm_ssub16>; +def SSUB8 : AAIIntrinsic<0b01100001, 0b11111111, "ssub8", int_arm_ssub8>; +def UASX : AAIIntrinsic<0b01100101, 0b11110011, "uasx", int_arm_uasx>; +def UADD16 : AAIIntrinsic<0b01100101, 0b11110001, "uadd16", int_arm_uadd16>; +def UADD8 : AAIIntrinsic<0b01100101, 0b11111001, "uadd8", int_arm_uadd8>; +def USAX : AAIIntrinsic<0b01100101, 0b11110101, "usax", int_arm_usax>; +def USUB16 : AAIIntrinsic<0b01100101, 0b11110111, "usub16", int_arm_usub16>; +def USUB8 : AAIIntrinsic<0b01100101, 0b11111111, "usub8", int_arm_usub8>; // Signed/Unsigned halving add/subtract -def SHASX : AAI<0b01100011, 0b11110011, "shasx">; -def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">; -def SHADD8 : AAI<0b01100011, 0b11111001, "shadd8">; -def SHSAX : AAI<0b01100011, 0b11110101, "shsax">; -def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">; -def SHSUB8 : AAI<0b01100011, 0b11111111, "shsub8">; -def UHASX : AAI<0b01100111, 0b11110011, "uhasx">; -def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">; -def UHADD8 : AAI<0b01100111, 0b11111001, "uhadd8">; -def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">; -def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">; -def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">; +def SHASX : AAIIntrinsic<0b01100011, 0b11110011, "shasx", int_arm_shasx>; +def SHADD16 : AAIIntrinsic<0b01100011, 0b11110001, "shadd16", int_arm_shadd16>; +def SHADD8 : AAIIntrinsic<0b01100011, 0b11111001, "shadd8", int_arm_shadd8>; +def SHSAX : AAIIntrinsic<0b01100011, 0b11110101, "shsax", int_arm_shsax>; +def SHSUB16 : AAIIntrinsic<0b01100011, 0b11110111, "shsub16", int_arm_shsub16>; +def SHSUB8 : AAIIntrinsic<0b01100011, 0b11111111, "shsub8", int_arm_shsub8>; +def UHASX : AAIIntrinsic<0b01100111, 0b11110011, "uhasx", int_arm_uhasx>; +def UHADD16 : AAIIntrinsic<0b01100111, 0b11110001, "uhadd16", int_arm_uhadd16>; +def UHADD8 : AAIIntrinsic<0b01100111, 0b11111001, "uhadd8", int_arm_uhadd8>; +def UHSAX : AAIIntrinsic<0b01100111, 0b11110101, "uhsax", int_arm_uhsax>; +def UHSUB16 : AAIIntrinsic<0b01100111, 0b11110111, "uhsub16", int_arm_uhsub16>; +def UHSUB8 : AAIIntrinsic<0b01100111, 0b11111111, "uhsub8", int_arm_uhsub8>; // Unsigned Sum of Absolute Differences [and Accumulate]. def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), MulFrm /* for convenience */, NoItinerary, "usad8", - "\t$Rd, $Rn, $Rm", []>, + "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (int_arm_usad8 GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rd; bits<4> Rn; @@ -3664,7 +3743,8 @@ def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), } def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), MulFrm /* for convenience */, NoItinerary, "usada8", - "\t$Rd, $Rn, $Rm, $Ra", []>, + "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (int_arm_usada8 GPR:$Rn, GPR:$Rm, GPR:$Ra))]>, Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]>{ bits<4> Rd; bits<4> Rn; @@ -3679,7 +3759,6 @@ def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), } // Signed/Unsigned saturate - def SSAT : AI<(outs GPRnopc:$Rd), (ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh), SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>, @@ -3748,6 +3827,10 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos), (USAT imm0_31:$pos, GPRnopc:$a, 0)>; def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), (SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos), + (SSAT16 imm1_16:$pos, GPRnopc:$a)>; +def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos), + (USAT16 imm0_15:$pos, GPRnopc:$a)>; //===----------------------------------------------------------------------===// // Bitwise Instructions. @@ -3850,6 +3933,7 @@ def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm, let Inst{11-0} = imm; } +let AddedComplexity = 1 in def : ARMPat<(and GPR:$src, mod_imm_not:$imm), (BICri GPR:$src, mod_imm_not:$imm)>; @@ -3899,7 +3983,8 @@ def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{15-12} = 0b0000; let Unpredictable{15-12} = 0b1111; } @@ -3910,14 +3995,16 @@ def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, 4, IIC_iMUL32, [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6, UseMulOps]>; + Requires<[IsARM, NoV6, UseMulOps]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; } def MLA : AsMul1I32<0b0000001, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra), IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))]>, - Requires<[IsARM, HasV6, UseMulOps]> { + Requires<[IsARM, HasV6, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { bits<4> Ra; let Inst{15-12} = Ra; } @@ -3928,12 +4015,14 @@ def MLAv5: ARMPseudoExpand<(outs GPRnopc:$Rd), pred:$p, cc_out:$s), 4, IIC_iMAC32, [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))], (MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>, - Requires<[IsARM, HasV6T2, UseMulOps]> { + Requires<[IsARM, HasV6T2, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { bits<4> Rd; bits<4> Rm; bits<4> Rn; @@ -3949,26 +4038,38 @@ let hasSideEffects = 0 in { let isCommutable = 1 in { def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, - "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; + "smull", "\t$RdLo, $RdHi, $Rn, $Rm", + [(set GPR:$RdLo, GPR:$RdHi, + (smullohi GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, - "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; + "umull", "\t$RdLo, $RdHi, $Rn, $Rm", + [(set GPR:$RdLo, GPR:$RdHi, + (umullohi GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL]>; let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), - 4, IIC_iMUL64, [], + 4, IIC_iMUL64, + [(set GPR:$RdLo, GPR:$RdHi, + (smullohi GPR:$Rn, GPR:$Rm))], (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), - 4, IIC_iMUL64, [], + 4, IIC_iMUL64, + [(set GPR:$RdLo, GPR:$RdHi, + (umullohi GPR:$Rn, GPR:$Rm))], (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; } } @@ -3976,17 +4077,20 @@ def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]> { + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]> { bits<4> RdLo; bits<4> RdHi; bits<4> Rm; @@ -4004,13 +4108,15 @@ def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), 4, IIC_iMAC64, [], (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), 4, IIC_iMAC64, [], (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; } } // hasSideEffects @@ -4019,13 +4125,15 @@ def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{15-12} = 0b1111; } def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{15-12} = 0b1111; } @@ -4033,57 +4141,67 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, - Requires<[IsARM, HasV6, UseMulOps]>; + Requires<[IsARM, HasV6, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6, UseMulOps]>; + Requires<[IsARM, HasV6, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; multiclass AI_smul<string opc> { def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), (sext_inreg GPR:$Rm, i16)))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), (sra GPR:$Rm, (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), (sext_inreg GPR:$Rm, i16)))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), (sra GPR:$Rm, (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", - []>, - Requires<[IsARM, HasV5TE]>; + [(set GPR:$Rd, (ARMsmulwb GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", - []>, - Requires<[IsARM, HasV5TE]>; + [(set GPR:$Rd, (ARMsmulwt GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; } @@ -4095,7 +4213,8 @@ multiclass AI_smla<string opc> { [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -4103,7 +4222,8 @@ multiclass AI_smla<string opc> { [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -4111,7 +4231,8 @@ multiclass AI_smla<string opc> { [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -4119,19 +4240,24 @@ multiclass AI_smla<string opc> { [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", - []>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + [(set GPRnopc:$Rd, + (add GPR:$Ra, (ARMsmulwb GPRnopc:$Rn, GPRnopc:$Rm)))]>, + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", - []>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + [(set GPRnopc:$Rd, + (add GPR:$Ra, (ARMsmulwt GPRnopc:$Rn, GPRnopc:$Rm)))]>, + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; } } @@ -4139,30 +4265,34 @@ defm SMUL : AI_smul<"smul">; defm SMLA : AI_smla<"smla">; // Halfword multiply accumulate long: SMLAL<x><y>. -def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), - IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; - -def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), - IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; - -def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), - IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; - -def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), - IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; +class SMLAL<bits<2> opc1, string asm> + : AMulxyI64<0b0001010, opc1, + (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + IIC_iMAC64, asm, "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; + +def SMLALBB : SMLAL<0b00, "smlalbb">; +def SMLALBT : SMLAL<0b10, "smlalbt">; +def SMLALTB : SMLAL<0b01, "smlaltb">; +def SMLALTT : SMLAL<0b11, "smlaltt">; + +def : ARMV5TEPat<(ARMsmlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (SMLALBB $Rn, $Rm, $RLo, $RHi)>; +def : ARMV5TEPat<(ARMsmlalbt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (SMLALBT $Rn, $Rm, $RLo, $RHi)>; +def : ARMV5TEPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (SMLALTB $Rn, $Rm, $RLo, $RHi)>; +def : ARMV5TEPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (SMLALTT $Rn, $Rm, $RLo, $RHi)>; // Helper class for AI_smld. class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops, InstrItinClass itin, string opc, string asm> - : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> { + : AI<oops, iops, MulFrm, itin, opc, asm, []>, + Requires<[IsARM, HasV6]> { bits<4> Rn; bits<4> Rm; let Inst{27-23} = 0b01110; @@ -4203,48 +4333,85 @@ multiclass AI_smld<bit sub, string opc> { def D : AMulDualIa<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">; + NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">; + NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, - !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">; + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + NoItinerary, + !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, - !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">; - + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + NoItinerary, + !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; } defm SMLA : AI_smld<0, "smla">; defm SMLS : AI_smld<1, "smls">; +def : ARMV6Pat<(int_arm_smlad GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLAD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(int_arm_smladx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLADX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(int_arm_smlsd GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLSD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(int_arm_smlsdx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLSDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(ARMSmlald GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLALD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; +def : ARMV6Pat<(ARMSmlaldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLALDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; +def : ARMV6Pat<(ARMSmlsld GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLSLD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; +def : ARMV6Pat<(ARMSmlsldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLSLDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; + multiclass AI_sdml<bit sub, string opc> { def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), - NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">; + NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm), - NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">; + NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; } defm SMUA : AI_sdml<0, "smua">; defm SMUS : AI_sdml<1, "smus">; +def : ARMV6Pat<(int_arm_smuad GPRnopc:$Rn, GPRnopc:$Rm), + (SMUAD GPRnopc:$Rn, GPRnopc:$Rm)>; +def : ARMV6Pat<(int_arm_smuadx GPRnopc:$Rn, GPRnopc:$Rm), + (SMUADX GPRnopc:$Rn, GPRnopc:$Rm)>; +def : ARMV6Pat<(int_arm_smusd GPRnopc:$Rn, GPRnopc:$Rm), + (SMUSD GPRnopc:$Rn, GPRnopc:$Rm)>; +def : ARMV6Pat<(int_arm_smusdx GPRnopc:$Rn, GPRnopc:$Rm), + (SMUSDX GPRnopc:$Rn, GPRnopc:$Rm)>; + //===----------------------------------------------------------------------===// // Division Instructions (ARMv7-A with virtualization extension) // def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, "sdiv", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasDivideInARM]>; + Requires<[IsARM, HasDivideInARM]>, + Sched<[WriteDIV]>; def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, "udiv", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasDivideInARM]>; + Requires<[IsARM, HasDivideInARM]>, + Sched<[WriteDIV]>; //===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. @@ -4831,14 +4998,15 @@ let AddedComplexity = 8 in { def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>; } -// SWP/SWPB are deprecated in V6/V7. +// SWP/SWPB are deprecated in V6/V7 and optional in v7VE. +// FIXME Use InstAlias to generate LDREX/STREX pairs instead. let mayLoad = 1, mayStore = 1 in { def SWP : AIswp<0, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>, - Requires<[PreV8]>; + Requires<[IsARM,PreV8]>; def SWPB: AIswp<1, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>, - Requires<[PreV8]>; + Requires<[IsARM,PreV8]>; } //===----------------------------------------------------------------------===// @@ -4850,7 +5018,7 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, imm:$CRm, imm:$opc2)]>, - Requires<[PreV8]> { + Requires<[IsARM,PreV8]> { bits<4> opc1; bits<4> CRn; bits<4> CRd; @@ -4872,7 +5040,7 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, imm:$CRm, imm:$opc2)]>, - Requires<[PreV8]> { + Requires<[IsARM,PreV8]> { let Inst{31-28} = 0b1111; bits<4> opc1; bits<4> CRn; @@ -5048,13 +5216,13 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm, list<dag> pattern> { defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>; defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>; -defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>; +defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>; defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>; -defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>; +defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; //===----------------------------------------------------------------------===// // Move between coprocessor and ARM core register. @@ -5132,7 +5300,7 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>, - Requires<[PreV8]>; + Requires<[IsARM,PreV8]>; def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0)>; @@ -5140,7 +5308,7 @@ def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>, - Requires<[PreV8]>; + Requires<[IsARM,PreV8]>; def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0)>; @@ -5183,7 +5351,7 @@ class MovRRCopro2<string opc, bit direction, dag oops, dag iops, list<dag> pattern = []> : ABXI<0b1100, oops, iops, NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern>, - Requires<[PreV8]> { + Requires<[IsARM,PreV8]> { let Inst{31-28} = 0b1111; let Inst{23-21} = 0b010; let Inst{20} = direction; @@ -5525,20 +5693,52 @@ def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>; // smul* and smla* def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), - (SMULBB GPR:$a, GPR:$b)>; + (SMULBB GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), - (SMULBT GPR:$a, GPR:$b)>; + (SMULBT GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), - (SMULTB GPR:$a, GPR:$b)>; + (SMULTB GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)), - (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), - (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; + +def : ARMV5TEPat<(int_arm_smulbb GPR:$a, GPR:$b), + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smulbt GPR:$a, GPR:$b), + (SMULBT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smultb GPR:$a, GPR:$b), + (SMULTB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smultt GPR:$a, GPR:$b), + (SMULTT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smulwb GPR:$a, GPR:$b), + (SMULWB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smulwt GPR:$a, GPR:$b), + (SMULWT GPR:$a, GPR:$b)>; + +def : ARMV5TEPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlabt GPR:$a, GPR:$b, GPR:$acc), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlatb GPR:$a, GPR:$b, GPR:$acc), (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlatt GPR:$a, GPR:$b, GPR:$acc), + (SMLATT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlawb GPR:$a, GPR:$b, GPR:$acc), + (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlawt GPR:$a, GPR:$b, GPR:$acc), + (SMLAWT GPR:$a, GPR:$b, GPR:$acc)>; // Pre-v7 uses MCR for synchronization barriers. def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, @@ -5717,33 +5917,49 @@ def : MnemonicAlias<"usubaddx", "usax">; // "mov Rd, mod_imm_not" can be handled via "mvn" in assembly, just like // for isel. -def : ARMInstAlias<"mov${s}${p} $Rd, $imm", +def : ARMInstSubst<"mov${s}${p} $Rd, $imm", (MVNi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>; -def : ARMInstAlias<"mvn${s}${p} $Rd, $imm", +def : ARMInstSubst<"mvn${s}${p} $Rd, $imm", (MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>; // Same for AND <--> BIC -def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm", +def : ARMInstSubst<"bic${s}${p} $Rd, $Rn, $imm", (ANDri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; -def : ARMInstAlias<"bic${s}${p} $Rdn, $imm", +def : ARMInstSubst<"bic${s}${p} $Rdn, $imm", (ANDri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; -def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm", +def : ARMInstSubst<"and${s}${p} $Rd, $Rn, $imm", (BICri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; -def : ARMInstAlias<"and${s}${p} $Rdn, $imm", +def : ARMInstSubst<"and${s}${p} $Rdn, $imm", (BICri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; // Likewise, "add Rd, mod_imm_neg" -> sub -def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm", +def : ARMInstSubst<"add${s}${p} $Rd, $Rn, $imm", (SUBri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; -def : ARMInstAlias<"add${s}${p} $Rd, $imm", +def : ARMInstSubst<"add${s}${p} $Rd, $imm", (SUBri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; +// Likewise, "sub Rd, mod_imm_neg" -> add +def : ARMInstSubst<"sub${s}${p} $Rd, $Rn, $imm", + (ADDri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; +def : ARMInstSubst<"sub${s}${p} $Rd, $imm", + (ADDri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; + + +def : ARMInstSubst<"adc${s}${p} $Rd, $Rn, $imm", + (SBCri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; +def : ARMInstSubst<"adc${s}${p} $Rdn, $imm", + (SBCri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; +def : ARMInstSubst<"sbc${s}${p} $Rd, $Rn, $imm", + (ADCri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; +def : ARMInstSubst<"sbc${s}${p} $Rdn, $imm", + (ADCri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; + // Same for CMP <--> CMN via mod_imm_neg -def : ARMInstAlias<"cmp${p} $Rd, $imm", +def : ARMInstSubst<"cmp${p} $Rd, $imm", (CMNri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>; -def : ARMInstAlias<"cmn${p} $Rd, $imm", +def : ARMInstSubst<"cmn${p} $Rd, $imm", (CMPri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>; // The shifter forms of the MOV instruction are aliased to the ASR, LSL, @@ -5837,21 +6053,28 @@ def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), // significantly more naive than the standard expansion: we conservatively // assume seq_cst, strong cmpxchg and omit clrex on failure. -let Constraints = "@earlyclobber $Rd,@earlyclobber $status", +let Constraints = "@earlyclobber $Rd,@earlyclobber $temp", mayLoad = 1, mayStore = 1 in { -def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$status), +def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$status), +def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$status), +def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status), +def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp), (ins GPR:$addr, GPRPair:$desired, GPRPair:$new), NoItinerary, []>, Sched<[]>; } + +def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary, + [(atomic_fence imm:$ordering, 0)]> { + let hasSideEffects = 1; + let Size = 0; + let AsmString = "@ COMPILER BARRIER"; +} diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index b5fa8e9..858136a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -587,6 +587,14 @@ def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; +def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, + SDTCisVT<2, v8i8>]>; +def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, + SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; +def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; +def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; + + def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); unsigned EltBits = 0; @@ -666,7 +674,7 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), (ins AddrMode:$Rn), IIC_VLD1, - "vld1", Dt, "$Vd, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -674,7 +682,7 @@ class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), (ins AddrMode:$Rn), IIC_VLD1x2, - "vld1", Dt, "$Vd, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -695,7 +703,7 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -703,7 +711,7 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -712,7 +720,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -720,7 +728,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -739,7 +747,7 @@ defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, - "$Vd, $Rn", "", []> { + "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -748,7 +756,7 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -756,7 +764,7 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -772,15 +780,15 @@ defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; -def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; -def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>; -def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>; +def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; +def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; +def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; // ...with 4 registers class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, - "$Vd, $Rn", "", []> { + "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -789,7 +797,7 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -797,7 +805,7 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -813,9 +821,9 @@ defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; -def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; -def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>; -def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>; +def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; +def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; +def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; // VLD2 : Vector Load (multiple 2-element structures) class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, @@ -829,22 +837,22 @@ class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, } def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; -def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; -def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; -def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; +def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; +def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; +def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; // ...with address register writeback: multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, @@ -867,45 +875,45 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, } defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; -def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; -def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; -def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; -def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; -def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; -def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; +def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; +def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; +def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; +def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; +def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; +def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; // ...with double-spaced registers def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), (ins addrmode6:$Rn), IIC_VLD3, - "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { + "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; @@ -915,9 +923,9 @@ def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; -def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; -def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; -def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; +def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; +def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; +def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; // ...with address register writeback: class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -925,7 +933,7 @@ class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; } @@ -934,9 +942,9 @@ def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; -def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; -def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; -def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; +def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; +def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; +def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; // ...with double-spaced registers: def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; @@ -946,25 +954,26 @@ def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; -def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; -def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; -def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; +def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; +def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; +def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; // ...alternate versions to be allocated odd register numbers: -def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>; -def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>; -def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>; +def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; +def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; +def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; -def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; -def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; -def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; +def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; +def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; +def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; // VLD4 : Vector Load (multiple 4-element structures) class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$Rn), IIC_VLD4, - "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { + "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>, + Sched<[WriteVLD4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; @@ -974,9 +983,9 @@ def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; -def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; -def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; -def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; +def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; +def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; +def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; // ...with address register writeback: class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -984,7 +993,7 @@ class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; } @@ -993,9 +1002,9 @@ def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; -def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; -def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; -def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; +def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; +def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; +def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; // ...with double-spaced registers: def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; @@ -1005,18 +1014,18 @@ def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; -def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; -def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; -def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; +def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; +def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; +def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; // ...alternate versions to be allocated odd register numbers: -def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>; -def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>; -def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>; +def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; +def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; +def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; -def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; -def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; -def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; +def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; +def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; +def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 @@ -1068,11 +1077,12 @@ class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, "$src = $Vd", [(set DPR:$Vd, (vector_insert (Ty DPR:$src), (i32 (LoadOp addrmode6oneL32:$Rn)), - imm:$lane))]> { + imm:$lane))]>, Sched<[WriteVLD1]> { let Rm = 0b1111; let DecoderMethod = "DecodeVLD1LN"; } -class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { +class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>, + Sched<[WriteVLD1]> { let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), (i32 (LoadOp addrmode6:$addr)), imm:$lane))]; @@ -1109,7 +1119,7 @@ class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", - "$src = $Vd, $Rn.addr = $wb", []> { + "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let DecoderMethod = "DecodeVLD1LN"; } @@ -1126,16 +1136,16 @@ def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { let Inst{4} = Rn{4}; } -def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; -def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; -def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; +def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; +def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; +def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; // VLD2LN : Vector Load (single 2-element structure to one lane) class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", - "$src1 = $Vd, $src2 = $dst2", []> { + "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2LN"; @@ -1151,9 +1161,9 @@ def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; -def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; -def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; +def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; +def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; +def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; // ...with double-spaced registers: def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { @@ -1163,8 +1173,8 @@ def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; -def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; +def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; +def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; // ...with address register writeback: class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1187,9 +1197,9 @@ def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; -def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; -def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; +def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; +def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; +def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -1198,8 +1208,8 @@ def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; -def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; +def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; +def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; // VLD3LN : Vector Load (single 3-element structure to one lane) class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1207,7 +1217,7 @@ class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", - "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { + "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> { let Rm = 0b1111; let DecoderMethod = "DecodeVLD3LN"; } @@ -1222,9 +1232,9 @@ def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; -def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; -def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; +def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; +def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; +def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; // ...with double-spaced registers: def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { @@ -1234,8 +1244,8 @@ def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; -def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; +def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; +def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; // ...with address register writeback: class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1246,7 +1256,7 @@ class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> IIC_VLD3lnu, "vld3", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", - []> { + []>, Sched<[WriteVLD2]> { let DecoderMethod = "DecodeVLD3LN"; } @@ -1260,9 +1270,9 @@ def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; -def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; -def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; +def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; +def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; +def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; @@ -1271,8 +1281,8 @@ def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; -def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; +def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; +def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; // VLD4LN : Vector Load (single 4-element structure to one lane) class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1281,7 +1291,8 @@ class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", - "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { + "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>, + Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4LN"; @@ -1298,9 +1309,9 @@ def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; -def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; -def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; +def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; +def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; +def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; // ...with double-spaced registers: def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { @@ -1311,8 +1322,8 @@ def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; -def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; +def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; +def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; // ...with address register writeback: class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1339,9 +1350,9 @@ def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; -def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; -def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; +def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; +def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; +def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -1351,8 +1362,8 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; -def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; +def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; +def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 @@ -1363,7 +1374,8 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, (ins AddrMode:$Rn), IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", [(set VecListOneDAllLanes:$Vd, - (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { + (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]>, + Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -1426,7 +1438,7 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { (outs VecListDPairAllLanes:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -1483,7 +1495,7 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, (outs VdTy:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; @@ -1492,7 +1504,7 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, (outs VdTy:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; } @@ -1516,7 +1528,8 @@ defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, class VLD3DUP<bits<4> op7_4, string Dt> : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), (ins addrmode6dup:$Rn), IIC_VLD3dup, - "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { + "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>, + Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{4} = 0; let DecoderMethod = "DecodeVLD3DupInstruction"; @@ -1526,9 +1539,9 @@ def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; -def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>; -def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; -def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; +def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; +def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; +def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; // ...with double-spaced registers (not used for codegen): def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; @@ -1540,7 +1553,7 @@ class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Inst{4} = 0; let DecoderMethod = "DecodeVLD3DupInstruction"; } @@ -1553,9 +1566,9 @@ def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; -def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; -def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; -def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; +def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; +def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; +def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; // VLD4DUP : Vector Load (single 4-element structure to all lanes) class VLD4DUP<bits<4> op7_4, string Dt> @@ -1572,9 +1585,9 @@ def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } -def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>; -def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; -def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; +def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; +def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; +def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; // ...with double-spaced registers (not used for codegen): def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; @@ -1587,7 +1600,7 @@ class VLD4DUPWB<bits<4> op7_4, string Dt> (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4DupInstruction"; } @@ -1600,9 +1613,9 @@ def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } -def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; -def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; -def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; +def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; +def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; +def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 @@ -1649,14 +1662,14 @@ class VSTQQQQWBPseudo<InstrItinClass itin> // VST1 : Vector Store (multiple single elements) class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), - IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { + IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), - IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { + IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1677,7 +1690,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1686,7 +1699,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1695,7 +1708,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1704,7 +1717,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1724,7 +1737,7 @@ defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), (ins AddrMode:$Rn, VecListThreeD:$Vd), - IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { + IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1733,7 +1746,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1742,7 +1755,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1758,16 +1771,16 @@ defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; -def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; -def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>; -def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; +def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; +def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; +def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; // ...with 4 registers class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", - []> { + []>, Sched<[WriteVST4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1776,7 +1789,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1785,7 +1798,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1801,9 +1814,9 @@ defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; -def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; -def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>; -def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; +def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; +def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; +def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; // VST2 : Vector Store (multiple 2-element structures) class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, @@ -1816,22 +1829,22 @@ class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, } def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVST2]>; def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVST2]>; def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVST2]>; def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVST4]>; def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVST4]>; def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVST4]>; -def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; -def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; -def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; +def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; +def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; +def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; // ...with address register writeback: multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, @@ -1839,7 +1852,7 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; @@ -1847,7 +1860,7 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } @@ -1856,7 +1869,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; @@ -1865,7 +1878,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } @@ -1882,12 +1895,12 @@ defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; -def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; -def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; -def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; -def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; -def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; -def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; +def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; +def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; +def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; +def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; +def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; +def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; // ...with double-spaced registers def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, @@ -1907,7 +1920,7 @@ defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, - "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { + "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; @@ -1917,9 +1930,9 @@ def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; -def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; -def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; -def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; +def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; +def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; +def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; // ...with address register writeback: class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1927,7 +1940,7 @@ class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; } @@ -1936,9 +1949,9 @@ def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; -def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; -def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; -def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; +def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; +def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; +def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; // ...with double-spaced registers: def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; @@ -1948,25 +1961,25 @@ def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; -def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; -def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; -def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; +def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; +def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; +def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; // ...alternate versions to be allocated odd register numbers: -def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>; -def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>; -def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>; +def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; +def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; +def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; -def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; -def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; -def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; +def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; +def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; +def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; // VST4 : Vector Store (multiple 4-element structures) class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", - "", []> { + "", []>, Sched<[WriteVST4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; @@ -1976,9 +1989,9 @@ def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; -def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; -def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; -def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; +def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; +def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; +def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; // ...with address register writeback: class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1986,7 +1999,7 @@ class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; } @@ -1995,9 +2008,9 @@ def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; -def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; -def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; -def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; +def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; +def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; +def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; // ...with double-spaced registers: def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; @@ -2007,18 +2020,18 @@ def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; -def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; -def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; -def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; +def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; +def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; +def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; // ...alternate versions to be allocated odd register numbers: -def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>; -def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>; -def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>; +def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; +def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; +def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; -def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; -def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; -def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; +def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; +def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; +def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 @@ -2052,12 +2065,13 @@ class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { + [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>, + Sched<[WriteVST1]> { let Rm = 0b1111; let DecoderMethod = "DecodeVST1LN"; } class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> - : VSTQLNPseudo<IIC_VST1ln> { + : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> { let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), addrmode6:$addr)]; } @@ -2096,11 +2110,12 @@ class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, "\\{$Vd[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), - AdrMode:$Rn, am6offset:$Rm))]> { + AdrMode:$Rn, am6offset:$Rm))]>, + Sched<[WriteVST1]> { let DecoderMethod = "DecodeVST1LN"; } class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> - : VSTQLNWBPseudo<IIC_VST1lnu> { + : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> { let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), addrmode6:$addr, am6offset:$offset))]; } @@ -2131,7 +2146,7 @@ class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", - "", []> { + "", []>, Sched<[WriteVST1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST2LN"; @@ -2147,9 +2162,9 @@ def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; -def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; -def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; +def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; +def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; +def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; // ...with double-spaced registers: def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { @@ -2161,8 +2176,8 @@ def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { let Inst{4} = Rn{4}; } -def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; -def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; +def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; +def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; // ...with address register writeback: class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -2185,9 +2200,9 @@ def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; -def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; -def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; +def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; +def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; +def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -2196,15 +2211,16 @@ def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; } -def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; -def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; +def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; +def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; // VST3LN : Vector Store (single 3-element structure from one lane) class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, - "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { + "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>, + Sched<[WriteVST2]> { let Rm = 0b1111; let DecoderMethod = "DecodeVST3LN"; } @@ -2219,9 +2235,9 @@ def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; -def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; -def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; +def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; +def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; +def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; // ...with double-spaced registers: def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { @@ -2255,9 +2271,9 @@ def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; -def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; -def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; +def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; +def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; +def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; @@ -2266,8 +2282,8 @@ def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } -def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; -def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; +def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; +def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; // VST4LN : Vector Store (single 4-element structure from one lane) class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -2275,7 +2291,7 @@ class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", - "", []> { + "", []>, Sched<[WriteVST2]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST4LN"; @@ -2292,9 +2308,9 @@ def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; -def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; -def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; +def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; +def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; +def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; // ...with double-spaced registers: def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { @@ -2305,8 +2321,8 @@ def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; -def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; +def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; +def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; // ...with address register writeback: class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -2331,9 +2347,9 @@ def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; -def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; -def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; +def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; +def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; +def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -2343,8 +2359,8 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; -def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; +def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; +def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 @@ -5550,8 +5566,7 @@ defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; // VABS : Vector Absolute Value defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, - IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", - int_arm_neon_vabs>; + IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>; def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs", "f32", v2f32, v2f32, fabs>; @@ -5567,29 +5582,6 @@ def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, v8f16, v8f16, fabs>, Requires<[HasNEON, HasFullFP16]>; -def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), - (v2i32 (bitconvert (v8i8 (add DPR:$src, - (NEONvshrs DPR:$src, (i32 7))))))), - (VABSv8i8 DPR:$src)>; -def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), - (v2i32 (bitconvert (v4i16 (add DPR:$src, - (NEONvshrs DPR:$src, (i32 15))))))), - (VABSv4i16 DPR:$src)>; -def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), - (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), - (VABSv2i32 DPR:$src)>; -def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), - (v4i32 (bitconvert (v16i8 (add QPR:$src, - (NEONvshrs QPR:$src, (i32 7))))))), - (VABSv16i8 QPR:$src)>; -def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), - (v4i32 (bitconvert (v8i16 (add QPR:$src, - (NEONvshrs QPR:$src, (i32 15))))))), - (VABSv8i16 QPR:$src)>; -def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), - (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), - (VABSv4i32 QPR:$src)>; - // VQABS : Vector Saturating Absolute Value defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", @@ -6443,7 +6435,8 @@ def VTBL1 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, "vtbl", "8", "$Vd, $Vn, $Vm", "", - [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; + [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; + let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), @@ -6498,6 +6491,49 @@ def VTBX4Pseudo IIC_VTBX4, "$orig = $dst", []>; } // DecoderMethod = "DecodeTBLInstruction" +def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)), + (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1), + v8i8:$Vm))>; +def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vm)), + (v8i8 (VTBX2 v8i8:$orig, + (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1), + v8i8:$Vm))>; + +def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vm)), + (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + (v8i8 (IMPLICIT_DEF)), dsub_3), + v8i8:$Vm))>; +def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vm)), + (v8i8 (VTBX3Pseudo v8i8:$orig, + (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + (v8i8 (IMPLICIT_DEF)), dsub_3), + v8i8:$Vm))>; + +def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), + (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + v8i8:$Vn3, dsub_3), + v8i8:$Vm))>; +def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), + (v8i8 (VTBX4Pseudo v8i8:$orig, + (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + v8i8:$Vn3, dsub_3), + v8i8:$Vm))>; + // VRINT : Vector Rounding multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { @@ -7139,6 +7175,17 @@ let Predicates = [IsBE] in { (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; } +def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; +def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; +def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; +def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; +def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; + //===----------------------------------------------------------------------===// // Assembler aliases // diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td index a681f64..891a8f4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -19,7 +19,7 @@ def imm_sr_XFORM: SDNodeXForm<imm, [{ unsigned Imm = N->getZExtValue(); return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32); }]>; -def ThumbSRImmAsmOperand: AsmOperandClass { let Name = "ImmThumbSR"; } +def ThumbSRImmAsmOperand: ImmAsmOperand<1,32> { let Name = "ImmThumbSR"; } def imm_sr : Operand<i32>, PatLeaf<(imm), [{ uint64_t Imm = N->getZExtValue(); return Imm > 0 && Imm <= 32; @@ -28,22 +28,31 @@ def imm_sr : Operand<i32>, PatLeaf<(imm), [{ let ParserMatchClass = ThumbSRImmAsmOperand; } -def imm_comp_XFORM : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), SDLoc(N), - MVT::i32); -}]>; - def imm0_7_neg : PatLeaf<(i32 imm), [{ return (uint32_t)-N->getZExtValue() < 8; }], imm_neg_XFORM>; +def ThumbModImmNeg1_7AsmOperand : AsmOperandClass { let Name = "ThumbModImmNeg1_7"; } +def mod_imm1_7_neg : Operand<i32>, PatLeaf<(imm), [{ + unsigned Value = -(unsigned)N->getZExtValue(); + return 0 < Value && Value < 8; + }], imm_neg_XFORM> { + let ParserMatchClass = ThumbModImmNeg1_7AsmOperand; +} + +def ThumbModImmNeg8_255AsmOperand : AsmOperandClass { let Name = "ThumbModImmNeg8_255"; } +def mod_imm8_255_neg : Operand<i32>, PatLeaf<(imm), [{ + unsigned Value = -(unsigned)N->getZExtValue(); + return 7 < Value && Value < 256; + }], imm_neg_XFORM> { + let ParserMatchClass = ThumbModImmNeg8_255AsmOperand; +} + + def imm0_255_comp : PatLeaf<(i32 imm), [{ return ~((uint32_t)N->getZExtValue()) < 256; }]>; -def imm8_255 : ImmLeaf<i32, [{ - return Imm >= 8 && Imm < 256; -}]>; def imm8_255_neg : PatLeaf<(i32 imm), [{ unsigned Val = -N->getZExtValue(); return Val >= 8 && Val < 256; @@ -275,8 +284,8 @@ def tADJCALLSTACKUP : Requires<[IsThumb, IsThumb1Only]>; def tADJCALLSTACKDOWN : - PseudoInst<(outs), (ins i32imm:$amt), NoItinerary, - [(ARMcallseq_start imm:$amt)]>, + PseudoInst<(outs), (ins i32imm:$amt, i32imm:$amt2), NoItinerary, + [(ARMcallseq_start imm:$amt, imm:$amt2)]>, Requires<[IsThumb, IsThumb1Only]>; } @@ -407,9 +416,9 @@ def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), let DecoderMethod = "DecodeThumbAddSPImm"; } -def : tInstAlias<"add${p} sp, $imm", +def : tInstSubst<"add${p} sp, $imm", (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>; -def : tInstAlias<"add${p} sp, sp, $imm", +def : tInstSubst<"add${p} sp, sp, $imm", (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>; // Can optionally specify SP as a three operand instruction. @@ -910,7 +919,7 @@ let isAdd = 1 in { def tADC : // A8.6.2 T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, "adc", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; + []>, Sched<[WriteALU]>; // Add immediate def tADDi3 : // A8.6.4 T1 @@ -938,6 +947,43 @@ let isAdd = 1 in { "add", "\t$Rd, $Rn, $Rm", [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; + /// Similar to the above except these set the 's' bit so the + /// instruction modifies the CPSR register. + /// + /// These opcodes will be converted to the real non-S opcodes by + /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. + let hasPostISelHook = 1, Defs = [CPSR] in { + let isCommutable = 1, Uses = [CPSR] in + def tADCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), + 2, IIC_iALUr, + [(set tGPR:$Rdn, CPSR, (ARMadde tGPR:$Rn, tGPR:$Rm, + CPSR))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; + + def tADDSi3 : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3), + 2, IIC_iALUi, + [(set tGPR:$Rd, CPSR, (ARMaddc tGPR:$Rm, + imm0_7:$imm3))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; + + def tADDSi8 : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8), + 2, IIC_iALUi, + [(set tGPR:$Rdn, CPSR, (ARMaddc tGPR:$Rn, + imm8_255:$imm8))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; + + let isCommutable = 1 in + def tADDSrr : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), + 2, IIC_iALUr, + [(set tGPR:$Rd, CPSR, (ARMaddc tGPR:$Rn, + tGPR:$Rm))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; + } + let hasSideEffects = 0 in def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr, "add", "\t$Rdn, $Rm", []>, @@ -951,6 +997,12 @@ let isAdd = 1 in { } } +def : tInstSubst<"sub${s}${p} $rd, $rn, $imm", + (tADDi3 tGPR:$rd, s_cc_out:$s, tGPR:$rn, mod_imm1_7_neg:$imm, pred:$p)>; +def : tInstSubst<"sub${s}${p} $rdn, $imm", + (tADDi8 tGPR:$rdn, s_cc_out:$s, mod_imm8_255_neg:$imm, pred:$p)>; + + // AND register let isCommutable = 1 in def tAND : // A8.6.12 @@ -1197,7 +1249,7 @@ def tSBC : // A8.6.151 T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, "sbc", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>, + []>, Sched<[WriteALU]>; // Subtract immediate @@ -1218,6 +1270,14 @@ def tSUBi8 : // A8.6.210 T2 [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>, Sched<[WriteALU]>; +def : tInstSubst<"add${s}${p} $rd, $rn, $imm", + (tSUBi3 tGPR:$rd, s_cc_out:$s, tGPR:$rn, mod_imm1_7_neg:$imm, pred:$p)>; + + +def : tInstSubst<"add${s}${p} $rdn, $imm", + (tSUBi8 tGPR:$rdn, s_cc_out:$s, mod_imm8_255_neg:$imm, pred:$p)>; + + // Subtract register def tSUBrr : // A8.6.212 T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), @@ -1226,6 +1286,42 @@ def tSUBrr : // A8.6.212 [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; +/// Similar to the above except these set the 's' bit so the +/// instruction modifies the CPSR register. +/// +/// These opcodes will be converted to the real non-S opcodes by +/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. +let hasPostISelHook = 1, Defs = [CPSR] in { + let Uses = [CPSR] in + def tSBCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), + 2, IIC_iALUr, + [(set tGPR:$Rdn, CPSR, (ARMsube tGPR:$Rn, tGPR:$Rm, + CPSR))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; + + def tSUBSi3 : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3), + 2, IIC_iALUi, + [(set tGPR:$Rd, CPSR, (ARMsubc tGPR:$Rm, + imm0_7:$imm3))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; + + def tSUBSi8 : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8), + 2, IIC_iALUi, + [(set tGPR:$Rdn, CPSR, (ARMsubc tGPR:$Rn, + imm8_255:$imm8))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; + + def tSUBSrr : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), + 2, IIC_iALUr, + [(set tGPR:$Rd, CPSR, (ARMsubc tGPR:$Rn, + tGPR:$Rm))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; +} + // Sign-extend byte def tSXTB : // A8.6.222 T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), @@ -1317,14 +1413,15 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), // Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them // and make use of the same compressed jump table format as Thumb-2. -let Size = 2 in { +let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1, + isIndirectBranch = 1 in { def tTBB_JT : tPseudoInst<(outs), - (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, - Sched<[WriteBr]>; + (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, + IIC_Br, []>, Sched<[WriteBr]>; def tTBH_JT : tPseudoInst<(outs), - (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, - Sched<[WriteBr]>; + (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, + IIC_Br, []>, Sched<[WriteBr]>; } //===----------------------------------------------------------------------===// @@ -1386,22 +1483,6 @@ def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8), def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm), (tCMPr tGPR:$Rn, tGPR:$Rm)>; -// Add with carry -def : T1Pat<(addc tGPR:$lhs, imm0_7:$rhs), - (tADDi3 tGPR:$lhs, imm0_7:$rhs)>; -def : T1Pat<(addc tGPR:$lhs, imm8_255:$rhs), - (tADDi8 tGPR:$lhs, imm8_255:$rhs)>; -def : T1Pat<(addc tGPR:$lhs, tGPR:$rhs), - (tADDrr tGPR:$lhs, tGPR:$rhs)>; - -// Subtract with carry -def : T1Pat<(addc tGPR:$lhs, imm0_7_neg:$rhs), - (tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>; -def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs), - (tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>; -def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs), - (tSUBrr tGPR:$lhs, tGPR:$rhs)>; - // Bswap 16 with load/store def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)), (tREV16 (tLDRHi t_addrmode_is2:$addr))>; @@ -1477,7 +1558,7 @@ def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>; // post-inc LDR -> LDM r0!, {r1}. The way operands are layed out in LDMs is // different to how ISel expects them for a post-inc load, so use a pseudo // and expand it just after ISel. -let usesCustomInserter = 1, +let usesCustomInserter = 1, mayLoad =1, Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in def tLDR_postidx: tPseudoInst<(outs rGPR:$Rt, rGPR:$Rn_wb), (ins rGPR:$Rn, pred:$p), @@ -1547,7 +1628,7 @@ def : T1Pat<(i32 thumb_immshifted:$src), (thumb_immshifted_shamt imm:$src))>; def : T1Pat<(i32 imm0_255_comp:$src), - (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>; + (tMVN (tMOVi8 (imm_not_XFORM imm:$src)))>; def : T1Pat<(i32 imm256_510:$src), (tADDi8 (tMOVi8 255), diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td index 603d664..42eac12 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -76,7 +76,11 @@ def t2_so_imm_notSext16_XFORM : SDNodeXForm<imm, [{ // t2_so_imm - Match a 32-bit immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit // immediate splatted into multiple bytes of the word. -def t2_so_imm_asmoperand : ImmAsmOperand { let Name = "T2SOImm"; } +def t2_so_imm_asmoperand : AsmOperandClass { + let Name = "T2SOImm"; + let RenderMethod = "addImmOperands"; + +} def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{ return ARM_AM::getT2SOImmVal(Imm) != -1; }]> { @@ -110,15 +114,14 @@ def t2_so_imm_notSext : Operand<i32>, PatLeaf<(imm), [{ // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm. def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; } -def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ - int64_t Value = -(int)N->getZExtValue(); - return Value && ARM_AM::getT2SOImmVal(Value) != -1; +def t2_so_imm_neg : Operand<i32>, ImmLeaf<i32, [{ + return Imm && ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1; }], t2_so_imm_neg_XFORM> { let ParserMatchClass = t2_so_imm_neg_asmoperand; } -/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. -def imm0_4095_asmoperand: ImmAsmOperand { let Name = "Imm0_4095"; } +/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0,4095]. +def imm0_4095_asmoperand: ImmAsmOperand<0,4095> { let Name = "Imm0_4095"; } def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 4096; }]> { @@ -139,7 +142,7 @@ def imm1_255_neg : PatLeaf<(i32 imm), [{ def imm0_255_not : PatLeaf<(i32 imm), [{ return (uint32_t)(~N->getZExtValue()) < 255; -}], imm_comp_XFORM>; +}], imm_not_XFORM>; def lo5AllOne : PatLeaf<(i32 imm), [{ // Returns true if all low 5-bits are 1. @@ -538,7 +541,8 @@ class T2FourReg<dag oops, dag iops, InstrItinClass itin, class T2MulLong<bits<3> opc22_20, bits<4> opc7_4, string opc, list<dag> pattern> : T2I<(outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64, - opc, "\t$RdLo, $RdHi, $Rn, $Rm", pattern> { + opc, "\t$RdLo, $RdHi, $Rn, $Rm", pattern>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]> { bits<4> RdLo; bits<4> RdHi; bits<4> Rn; @@ -556,7 +560,8 @@ class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4, string opc> : T2I<(outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, opc, "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi"> { + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]> { bits<4> RdLo; bits<4> RdHi; bits<4> Rn; @@ -977,7 +982,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, PatFrag opnode> { def i12 : T2Ii12<(outs target:$Rt), (ins t2addrmode_imm12:$addr), iii, opc, ".w\t$Rt, $addr", - [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]> { + [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]>, + Sched<[WriteLd]> { bits<4> Rt; bits<17> addr; let Inst{31-25} = 0b1111100; @@ -993,7 +999,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, } def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii, opc, "\t$Rt, $addr", - [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]> { + [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]>, + Sched<[WriteLd]> { bits<4> Rt; bits<13> addr; let Inst{31-27} = 0b11111; @@ -1015,7 +1022,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, } def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis, opc, ".w\t$Rt, $addr", - [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]> { + [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]>, + Sched<[WriteLd]> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; @@ -1039,7 +1047,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, // from the PC. def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii, opc, ".w\t$Rt, $addr", - [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { + [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]>, + Sched<[WriteLd]> { let isReMaterializable = 1; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; @@ -1065,7 +1074,8 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> { def i12 : T2Ii12<(outs), (ins target:$Rt, t2addrmode_imm12:$addr), iii, opc, ".w\t$Rt, $addr", - [(opnode target:$Rt, t2addrmode_imm12:$addr)]> { + [(opnode target:$Rt, t2addrmode_imm12:$addr)]>, + Sched<[WriteST]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0001; let Inst{22-21} = opcod; @@ -1082,7 +1092,8 @@ multiclass T2I_st<bits<2> opcod, string opc, } def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii, opc, "\t$Rt, $addr", - [(opnode target:$Rt, t2addrmode_negimm8:$addr)]> { + [(opnode target:$Rt, t2addrmode_negimm8:$addr)]>, + Sched<[WriteST]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0000; let Inst{22-21} = opcod; @@ -1102,7 +1113,8 @@ multiclass T2I_st<bits<2> opcod, string opc, } def s : T2Iso <(outs), (ins target:$Rt, t2addrmode_so_reg:$addr), iis, opc, ".w\t$Rt, $addr", - [(opnode target:$Rt, t2addrmode_so_reg:$addr)]> { + [(opnode target:$Rt, t2addrmode_so_reg:$addr)]>, + Sched<[WriteST]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0000; let Inst{22-21} = opcod; @@ -1121,28 +1133,10 @@ multiclass T2I_st<bits<2> opcod, string opc, /// T2I_ext_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -class T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> - : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr, - opc, ".w\t$Rd, $Rm$rot", - [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>, - Requires<[IsThumb2]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - - bits<2> rot; - let Inst{5-4} = rot{1-0}; // rotate -} - -// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier. -class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> - : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), - IIC_iEXTr, opc, "\t$Rd, $Rm$rot", - [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { +class T2I_ext_rrot_base<bits<3> opcod, dag iops, dag oops, + string opc, string oprs, + list<dag> pattern> + : T2TwoReg<iops, oops, IIC_iEXTr, opc, oprs, pattern> { bits<2> rot; let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -1150,46 +1144,34 @@ class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> let Inst{19-16} = 0b1111; // Rn let Inst{15-12} = 0b1111; let Inst{7} = 1; - let Inst{5-4} = rot; -} - -// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern -// supported yet. -class T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> - : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr, - opc, "\t$Rd, $Rm$rot", []>, - Requires<[IsThumb2, HasT2ExtractPack]> { - bits<2> rot; - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - let Inst{5-4} = rot; -} + let Inst{5-4} = rot; // rotate +} + +class T2I_ext_rrot<bits<3> opcod, string opc> + : T2I_ext_rrot_base<opcod, + (outs rGPR:$Rd), + (ins rGPR:$Rm, rot_imm:$rot), + opc, ".w\t$Rd, $Rm$rot", []>, + Requires<[IsThumb2]>, + Sched<[WriteALU, ReadALU]>; + +// UXTB16, SXTB16 - Requires HasDSP, does not need the .w qualifier. +class T2I_ext_rrot_xtb16<bits<3> opcod, string opc> + : T2I_ext_rrot_base<opcod, + (outs rGPR:$Rd), + (ins rGPR:$Rm, rot_imm:$rot), + opc, "\t$Rd, $Rm$rot", []>, + Requires<[HasDSP, IsThumb2]>, + Sched<[WriteALU, ReadALU]>; /// T2I_exta_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -class T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> +class T2I_exta_rrot<bits<3> opcod, string opc> : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot), - IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", - [(set rGPR:$Rd, (opnode rGPR:$Rn, (rotr rGPR:$Rm,rot_imm:$rot)))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { - bits<2> rot; - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - let Inst{5-4} = rot; -} - -class T2I_exta_rrot_np<bits<3> opcod, string opc> - : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm,rot_imm:$rot), IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []>, - Requires<[HasT2ExtractPack, IsThumb2]> { + Requires<[HasDSP, IsThumb2]>, + Sched<[WriteALU, ReadALU]> { bits<2> rot; let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -1279,7 +1261,8 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // Load doubleword def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_imm8s4:$addr), - IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>; + IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>, + Sched<[WriteLd]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 // zextload i1 -> zextload i8 @@ -1333,17 +1316,20 @@ let mayLoad = 1, hasSideEffects = 0 in { def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_iu, - "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; + "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_iu, - "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; + "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), @@ -1353,41 +1339,45 @@ def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; + "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []>; + []>, Sched<[WriteLd]>; def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []>; + []>, Sched<[WriteLd]>; def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>, + Sched<[WriteLd]>; } // mayLoad = 1, hasSideEffects = 0 // LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110). // Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii> : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc, - "\t$Rt, $addr", []> { + "\t$Rt, $addr", []>, Sched<[WriteLd]> { bits<4> Rt; bits<13> addr; let Inst{31-27} = 0b11111; @@ -1431,11 +1421,14 @@ class T2Ildacq<bits<4> bits23_20, bits<2> bit54, dag oops, dag iops, } def t2LDA : T2Ildacq<0b1101, 0b10, (outs rGPR:$Rt), - (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>; + (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>, + Sched<[WriteLd]>; def t2LDAB : T2Ildacq<0b1101, 0b00, (outs rGPR:$Rt), - (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>; + (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>, + Sched<[WriteLd]>; def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt), - (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>; + (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>, + Sched<[WriteLd]>; // Store defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR, store>; @@ -1448,7 +1441,8 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), - IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>; + IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>, + Sched<[WriteST]>; // Indexed stores @@ -1457,19 +1451,22 @@ def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb), (ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "str", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>, + Sched<[WriteST]>; def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "strh", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>, + Sched<[WriteST]>; def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, "strb", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>, + Sched<[WriteST]>; } // mayStore = 1, hasSideEffects = 0 def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), @@ -1480,7 +1477,8 @@ def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPRnopc:$Rn_wb, (post_store GPRnopc:$Rt, addr_offset_none:$Rn, - t2am_imm8_offset:$offset))]>; + t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, addr_offset_none:$Rn, @@ -1490,7 +1488,8 @@ def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb), "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPRnopc:$Rn_wb, (post_truncsti16 rGPR:$Rt, addr_offset_none:$Rn, - t2am_imm8_offset:$offset))]>; + t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, addr_offset_none:$Rn, @@ -1500,7 +1499,8 @@ def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb), "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPRnopc:$Rn_wb, (post_truncsti8 rGPR:$Rt, addr_offset_none:$Rn, - t2am_imm8_offset:$offset))]>; + t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; // Pseudo-instructions for pattern matching the pre-indexed stores. We can't // put the patterns on the instruction definitions directly as ISel wants @@ -1513,17 +1513,20 @@ def t2STR_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p), 4, IIC_iStore_ru, [(set GPRnopc:$Rn_wb, - (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; + (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; def t2STRB_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p), 4, IIC_iStore_ru, [(set GPRnopc:$Rn_wb, - (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; + (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p), 4, IIC_iStore_ru, [(set GPRnopc:$Rn_wb, - (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; + (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; } // STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly @@ -1531,7 +1534,7 @@ def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), // Ref: A8.6.193 STR (immediate, Thumb) Encoding T4 class T2IstT<bits<2> type, string opc, InstrItinClass ii> : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, - "\t$Rt, $addr", []> { + "\t$Rt, $addr", []>, Sched<[WriteST]> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = 0; // not signed @@ -1557,7 +1560,8 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>; let mayLoad = 1 in def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), (ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru, - "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> { + "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []>, + Sched<[WriteLd]> { let DecoderMethod = "DecodeT2LDRDPreInstruction"; } @@ -1565,13 +1569,13 @@ let mayLoad = 1 in def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), (ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm", - "$addr.base = $wb", []>; + "$addr.base = $wb", []>, Sched<[WriteLd]>; let mayStore = 1 in def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!", - "$addr.base = $wb", []> { + "$addr.base = $wb", []>, Sched<[WriteST]> { let DecoderMethod = "DecodeT2STRDPreInstruction"; } @@ -1580,12 +1584,13 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb), (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr, t2am_imm8s4_offset:$imm), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm", - "$addr.base = $wb", []>; + "$addr.base = $wb", []>, Sched<[WriteST]>; class T2Istrrel<bits<2> bit54, dag oops, dag iops, string opc, string asm, list<dag> pattern> : Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary, opc, - asm, "", pattern>, Requires<[IsThumb, HasAcquireRelease]> { + asm, "", pattern>, Requires<[IsThumb, HasAcquireRelease]>, + Sched<[WriteST]> { bits<4> Rt; bits<4> addr; @@ -1861,7 +1866,7 @@ defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>; // let hasSideEffects = 0 in -def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr, +def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rm), IIC_iMOVr, "mov", ".w\t$Rd, $Rm", []>, Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -1870,11 +1875,11 @@ def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr, let Inst{14-12} = 0b000; let Inst{7-4} = 0b0000; } -def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm, +def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, zero_reg)>; -def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm, +def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, CPSR)>; -def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm, +def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, CPSR)>; // AddedComplexity to ensure isel tries t2MOVi before t2MOVi16. @@ -1926,10 +1931,11 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi, def : InstAlias<"mov${p} $Rd, $imm", (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p), 0>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteALU]>; def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), - (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; + (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, + Sched<[WriteALU]>; let Constraints = "$src = $Rd" in { def t2MOVTi16 : T2I<(outs rGPR:$Rd), @@ -1969,31 +1975,43 @@ def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>; // Sign extenders -def t2SXTB : T2I_ext_rrot<0b100, "sxtb", - UnOpFrag<(sext_inreg node:$Src, i8)>>; -def t2SXTH : T2I_ext_rrot<0b000, "sxth", - UnOpFrag<(sext_inreg node:$Src, i16)>>; -def t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">; +def t2SXTB : T2I_ext_rrot<0b100, "sxtb">; +def t2SXTH : T2I_ext_rrot<0b000, "sxth">; +def t2SXTB16 : T2I_ext_rrot_xtb16<0b010, "sxtb16">; + +def t2SXTAB : T2I_exta_rrot<0b100, "sxtab">; +def t2SXTAH : T2I_exta_rrot<0b000, "sxtah">; +def t2SXTAB16 : T2I_exta_rrot<0b010, "sxtab16">; + +def : T2Pat<(sext_inreg (rotr rGPR:$Rn, rot_imm:$rot), i8), + (t2SXTB rGPR:$Rn, rot_imm:$rot)>; +def : T2Pat<(sext_inreg (rotr rGPR:$Rn, rot_imm:$rot), i16), + (t2SXTH rGPR:$Rn, rot_imm:$rot)>; +def : Thumb2DSPPat<(add rGPR:$Rn, + (sext_inreg (rotr rGPR:$Rm, rot_imm:$rot), i8)), + (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(add rGPR:$Rn, + (sext_inreg (rotr rGPR:$Rm, rot_imm:$rot), i16)), + (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(int_arm_sxtb16 rGPR:$Rn), + (t2SXTB16 rGPR:$Rn, 0)>; +def : Thumb2DSPPat<(int_arm_sxtab16 rGPR:$Rn, rGPR:$Rm), + (t2SXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>; -def t2SXTAB : T2I_exta_rrot<0b100, "sxtab", - BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; -def t2SXTAH : T2I_exta_rrot<0b000, "sxtah", - BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; -def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">; // A simple right-shift can also be used in most cases (the exception is the // SXTH operations with a rotate of 24: there the non-contiguous bits are // relevant). -def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg +def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)), (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; -def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg +def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), i16)), (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; -def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg +def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg (rotr rGPR:$Rm, (i32 24)), i16)), (t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>; -def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg +def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg (or (srl rGPR:$Rm, (i32 24)), (shl rGPR:$Rm, (i32 8))), i16)), (t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>; @@ -2001,12 +2019,19 @@ def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg // Zero extenders let AddedComplexity = 16 in { -def t2UXTB : T2I_ext_rrot<0b101, "uxtb", - UnOpFrag<(and node:$Src, 0x000000FF)>>; -def t2UXTH : T2I_ext_rrot<0b001, "uxth", - UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16", - UnOpFrag<(and node:$Src, 0x00FF00FF)>>; +def t2UXTB : T2I_ext_rrot<0b101, "uxtb">; +def t2UXTH : T2I_ext_rrot<0b001, "uxth">; +def t2UXTB16 : T2I_ext_rrot_xtb16<0b011, "uxtb16">; + +def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x000000FF), + (t2UXTB rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x0000FFFF), + (t2UXTH rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x00FF00FF), + (t2UXTB16 rGPR:$Rm, rot_imm:$rot)>; + +def : Thumb2DSPPat<(int_arm_uxtb16 rGPR:$Rm), + (t2UXTB16 rGPR:$Rm, 0)>; // FIXME: This pattern incorrectly assumes the shl operator is a rotate. // The transformation should probably be done as a combiner action @@ -2014,23 +2039,29 @@ def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16", // eight bits of the source into the lower eight bits of the result. //def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF), // (t2UXTB16 rGPR:$Src, 3)>, -// Requires<[HasT2ExtractPack, IsThumb2]>; +// Requires<[HasDSP, IsThumb2]>; def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF), (t2UXTB16 rGPR:$Src, 1)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; -def t2UXTAB : T2I_exta_rrot<0b101, "uxtab", - BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; -def t2UXTAH : T2I_exta_rrot<0b001, "uxtah", - BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; -def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">; +def t2UXTAB : T2I_exta_rrot<0b101, "uxtab">; +def t2UXTAH : T2I_exta_rrot<0b001, "uxtah">; +def t2UXTAB16 : T2I_exta_rrot<0b011, "uxtab16">; -def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), +def : Thumb2DSPPat<(add rGPR:$Rn, (and (rotr rGPR:$Rm, rot_imm:$rot), + 0x00FF)), + (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(add rGPR:$Rn, (and (rotr rGPR:$Rm, rot_imm:$rot), + 0xFFFF)), + (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)), (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; -def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), +def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(int_arm_uxtab16 rGPR:$Rn, rGPR:$Rm), + (t2UXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>; } @@ -2060,6 +2091,19 @@ defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", ARMadde, 1>; defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", ARMsube>; } +def : t2InstSubst<"adc${s}${p} $rd, $rn, $imm", + (t2SBCri rGPR:$rd, rGPR:$rn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>; +def : t2InstSubst<"sbc${s}${p} $rd, $rn, $imm", + (t2ADCri rGPR:$rd, rGPR:$rn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>; + +def : t2InstSubst<"add${s}${p}.w $rd, $rn, $imm", + (t2SUBri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; +def : t2InstSubst<"addw${p} $rd, $rn, $imm", + (t2SUBri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>; +def : t2InstSubst<"sub${s}${p}.w $rd, $rn, $imm", + (t2ADDri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; +def : t2InstSubst<"subw${p} $rd, $rn, $imm", + (t2ADDri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>; // RSB defm t2RSB : T2I_rbin_irs <0b1110, "rsb", sub>; @@ -2102,10 +2146,9 @@ def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR), def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR), (t2SBCrr rGPR:$src, (t2MOVi16 (imm_not_XFORM imm:$imm)))>; -// Select Bytes -- for disassembly only - def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>, + NoItinerary, "sel", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (int_arm_sel GPR:$Rn, GPR:$Rm))]>, Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-24} = 0b010; @@ -2119,9 +2162,7 @@ def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), // A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned) // And Miscellaneous operations -- for disassembly only class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc, - list<dag> pat = [/* For disassembly only; pattern left blank */], - dag iops = (ins rGPR:$Rn, rGPR:$Rm), - string asm = "\t$Rd, $Rn, $Rm"> + list<dag> pat, dag iops, string asm> : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>, Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; @@ -2139,60 +2180,72 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc, let Inst{3-0} = Rm; } -// Saturating add/subtract -- for disassembly only - -def t2QADD : T2I_pam<0b000, 0b1000, "qadd", - [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">; -def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">; -def t2QASX : T2I_pam<0b010, 0b0001, "qasx">; -def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd", [], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub", [], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">; -def t2QSUB : T2I_pam<0b000, 0b1010, "qsub", - [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">; -def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">; -def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">; -def t2UQADD8 : T2I_pam<0b000, 0b0101, "uqadd8">; -def t2UQASX : T2I_pam<0b010, 0b0101, "uqasx">; -def t2UQSAX : T2I_pam<0b110, 0b0101, "uqsax">; -def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">; -def t2UQSUB8 : T2I_pam<0b100, 0b0101, "uqsub8">; - -// Signed/Unsigned add/subtract -- for disassembly only - -def t2SASX : T2I_pam<0b010, 0b0000, "sasx">; -def t2SADD16 : T2I_pam<0b001, 0b0000, "sadd16">; -def t2SADD8 : T2I_pam<0b000, 0b0000, "sadd8">; -def t2SSAX : T2I_pam<0b110, 0b0000, "ssax">; -def t2SSUB16 : T2I_pam<0b101, 0b0000, "ssub16">; -def t2SSUB8 : T2I_pam<0b100, 0b0000, "ssub8">; -def t2UASX : T2I_pam<0b010, 0b0100, "uasx">; -def t2UADD16 : T2I_pam<0b001, 0b0100, "uadd16">; -def t2UADD8 : T2I_pam<0b000, 0b0100, "uadd8">; -def t2USAX : T2I_pam<0b110, 0b0100, "usax">; -def t2USUB16 : T2I_pam<0b101, 0b0100, "usub16">; -def t2USUB8 : T2I_pam<0b100, 0b0100, "usub8">; - -// Signed/Unsigned halving add/subtract -- for disassembly only - -def t2SHASX : T2I_pam<0b010, 0b0010, "shasx">; -def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">; -def t2SHADD8 : T2I_pam<0b000, 0b0010, "shadd8">; -def t2SHSAX : T2I_pam<0b110, 0b0010, "shsax">; -def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">; -def t2SHSUB8 : T2I_pam<0b100, 0b0010, "shsub8">; -def t2UHASX : T2I_pam<0b010, 0b0110, "uhasx">; -def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">; -def t2UHADD8 : T2I_pam<0b000, 0b0110, "uhadd8">; -def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">; -def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">; -def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">; +class T2I_pam_intrinsics<bits<3> op22_20, bits<4> op7_4, string opc, + Intrinsic intrinsic> + : T2I_pam<op22_20, op7_4, opc, + [(set rGPR:$Rd, (intrinsic rGPR:$Rn, rGPR:$Rm))], + (ins rGPR:$Rn, rGPR:$Rm), "\t$Rd, $Rn, $Rm">; + +class T2I_pam_intrinsics_rev<bits<3> op22_20, bits<4> op7_4, string opc> + : T2I_pam<op22_20, op7_4, opc, [], + (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; + +// Saturating add/subtract +def t2QADD16 : T2I_pam_intrinsics<0b001, 0b0001, "qadd16", int_arm_qadd16>; +def t2QADD8 : T2I_pam_intrinsics<0b000, 0b0001, "qadd8", int_arm_qadd8>; +def t2QASX : T2I_pam_intrinsics<0b010, 0b0001, "qasx", int_arm_qasx>; +def t2UQSUB8 : T2I_pam_intrinsics<0b100, 0b0101, "uqsub8", int_arm_uqsub8>; +def t2QSAX : T2I_pam_intrinsics<0b110, 0b0001, "qsax", int_arm_qsax>; +def t2QSUB16 : T2I_pam_intrinsics<0b101, 0b0001, "qsub16", int_arm_qsub16>; +def t2QSUB8 : T2I_pam_intrinsics<0b100, 0b0001, "qsub8", int_arm_qsub8>; +def t2UQADD16 : T2I_pam_intrinsics<0b001, 0b0101, "uqadd16", int_arm_uqadd16>; +def t2UQADD8 : T2I_pam_intrinsics<0b000, 0b0101, "uqadd8", int_arm_uqadd8>; +def t2UQASX : T2I_pam_intrinsics<0b010, 0b0101, "uqasx", int_arm_uqasx>; +def t2UQSAX : T2I_pam_intrinsics<0b110, 0b0101, "uqsax", int_arm_uqsax>; +def t2UQSUB16 : T2I_pam_intrinsics<0b101, 0b0101, "uqsub16", int_arm_uqsub16>; +def t2QADD : T2I_pam_intrinsics_rev<0b000, 0b1000, "qadd">; +def t2QSUB : T2I_pam_intrinsics_rev<0b000, 0b1010, "qsub">; +def t2QDADD : T2I_pam_intrinsics_rev<0b000, 0b1001, "qdadd">; +def t2QDSUB : T2I_pam_intrinsics_rev<0b000, 0b1011, "qdsub">; + +def : Thumb2DSPPat<(int_arm_qadd rGPR:$Rm, rGPR:$Rn), + (t2QADD rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, rGPR:$Rn), + (t2QSUB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(int_arm_qadd(int_arm_qadd rGPR:$Rm, rGPR:$Rm), rGPR:$Rn), + (t2QDADD rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)), + (t2QDSUB rGPR:$Rm, rGPR:$Rn)>; + +// Signed/Unsigned add/subtract + +def t2SASX : T2I_pam_intrinsics<0b010, 0b0000, "sasx", int_arm_sasx>; +def t2SADD16 : T2I_pam_intrinsics<0b001, 0b0000, "sadd16", int_arm_sadd16>; +def t2SADD8 : T2I_pam_intrinsics<0b000, 0b0000, "sadd8", int_arm_sadd8>; +def t2SSAX : T2I_pam_intrinsics<0b110, 0b0000, "ssax", int_arm_ssax>; +def t2SSUB16 : T2I_pam_intrinsics<0b101, 0b0000, "ssub16", int_arm_ssub16>; +def t2SSUB8 : T2I_pam_intrinsics<0b100, 0b0000, "ssub8", int_arm_ssub8>; +def t2UASX : T2I_pam_intrinsics<0b010, 0b0100, "uasx", int_arm_uasx>; +def t2UADD16 : T2I_pam_intrinsics<0b001, 0b0100, "uadd16", int_arm_uadd16>; +def t2UADD8 : T2I_pam_intrinsics<0b000, 0b0100, "uadd8", int_arm_uadd8>; +def t2USAX : T2I_pam_intrinsics<0b110, 0b0100, "usax", int_arm_usax>; +def t2USUB16 : T2I_pam_intrinsics<0b101, 0b0100, "usub16", int_arm_usub16>; +def t2USUB8 : T2I_pam_intrinsics<0b100, 0b0100, "usub8", int_arm_usub8>; + +// Signed/Unsigned halving add/subtract + +def t2SHASX : T2I_pam_intrinsics<0b010, 0b0010, "shasx", int_arm_shasx>; +def t2SHADD16 : T2I_pam_intrinsics<0b001, 0b0010, "shadd16", int_arm_shadd16>; +def t2SHADD8 : T2I_pam_intrinsics<0b000, 0b0010, "shadd8", int_arm_shadd8>; +def t2SHSAX : T2I_pam_intrinsics<0b110, 0b0010, "shsax", int_arm_shsax>; +def t2SHSUB16 : T2I_pam_intrinsics<0b101, 0b0010, "shsub16", int_arm_shsub16>; +def t2SHSUB8 : T2I_pam_intrinsics<0b100, 0b0010, "shsub8", int_arm_shsub8>; +def t2UHASX : T2I_pam_intrinsics<0b010, 0b0110, "uhasx", int_arm_uhasx>; +def t2UHADD16 : T2I_pam_intrinsics<0b001, 0b0110, "uhadd16", int_arm_uhadd16>; +def t2UHADD8 : T2I_pam_intrinsics<0b000, 0b0110, "uhadd8", int_arm_uhadd8>; +def t2UHSAX : T2I_pam_intrinsics<0b110, 0b0110, "uhsax", int_arm_uhsax>; +def t2UHSUB16 : T2I_pam_intrinsics<0b101, 0b0110, "uhsub16", int_arm_uhsub16>; +def t2UHSUB8 : T2I_pam_intrinsics<0b100, 0b0110, "uhsub8", int_arm_uhsub8>; // Helper class for disassembly only // A6.3.16 & A6.3.17 @@ -2220,96 +2273,94 @@ class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, // Unsigned Sum of Absolute Differences [and Accumulate]. def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>, + NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (int_arm_usad8 rGPR:$Rn, rGPR:$Rm))]>, Requires<[IsThumb2, HasDSP]> { let Inst{15-12} = 0b1111; } def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary, - "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>, + "usada8", "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (int_arm_usada8 rGPR:$Rn, rGPR:$Rm, rGPR:$Ra))]>, Requires<[IsThumb2, HasDSP]>; // Signed/Unsigned saturate. -class T2SatI<dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : T2I<oops, iops, itin, opc, asm, pattern> { +let hasSideEffects = 1 in +class T2SatI<dag iops, string opc, string asm> + : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, []> { bits<4> Rd; bits<4> Rn; bits<5> sat_imm; - bits<7> sh; + bits<6> sh; - let Inst{11-8} = Rd; + let Inst{31-24} = 0b11110011; + let Inst{21} = sh{5}; + let Inst{20} = 0; let Inst{19-16} = Rn; - let Inst{4-0} = sat_imm; - let Inst{21} = sh{5}; + let Inst{15} = 0; let Inst{14-12} = sh{4-2}; - let Inst{7-6} = sh{1-0}; + let Inst{11-8} = Rd; + let Inst{7-6} = sh{1-0}; + let Inst{5} = 0; + let Inst{4-0} = sat_imm; } -def t2SSAT: T2SatI< - (outs rGPR:$Rd), - (ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), - NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>, - Requires<[IsThumb2]> { - let Inst{31-27} = 0b11110; - let Inst{25-22} = 0b1100; - let Inst{20} = 0; - let Inst{15} = 0; +def t2SSAT: T2SatI<(ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), + "ssat", "\t$Rd, $sat_imm, $Rn$sh">, + Requires<[IsThumb2]> { + let Inst{23-22} = 0b00; let Inst{5} = 0; } -def t2SSAT16: T2SatI< - (outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary, - "ssat16", "\t$Rd, $sat_imm, $Rn", []>, - Requires<[IsThumb2, HasDSP]> { - let Inst{31-27} = 0b11110; - let Inst{25-22} = 0b1100; - let Inst{20} = 0; - let Inst{15} = 0; - let Inst{21} = 1; // sh = '1' - let Inst{14-12} = 0b000; // imm3 = '000' - let Inst{7-6} = 0b00; // imm2 = '00' - let Inst{5-4} = 0b00; +def t2SSAT16: T2SatI<(ins imm1_16:$sat_imm, rGPR:$Rn), + "ssat16", "\t$Rd, $sat_imm, $Rn">, + Requires<[IsThumb2, HasDSP]> { + let Inst{23-22} = 0b00; + let sh = 0b100000; + let Inst{4} = 0; } -def t2USAT: T2SatI< - (outs rGPR:$Rd), - (ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), - NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []>, - Requires<[IsThumb2]> { - let Inst{31-27} = 0b11110; - let Inst{25-22} = 0b1110; - let Inst{20} = 0; - let Inst{15} = 0; +def t2USAT: T2SatI<(ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), + "usat", "\t$Rd, $sat_imm, $Rn$sh">, + Requires<[IsThumb2]> { + let Inst{23-22} = 0b10; } -def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn), - NoItinerary, - "usat16", "\t$Rd, $sat_imm, $Rn", []>, +def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn), + "usat16", "\t$Rd, $sat_imm, $Rn">, Requires<[IsThumb2, HasDSP]> { - let Inst{31-22} = 0b1111001110; - let Inst{20} = 0; - let Inst{15} = 0; - let Inst{21} = 1; // sh = '1' - let Inst{14-12} = 0b000; // imm3 = '000' - let Inst{7-6} = 0b00; // imm2 = '00' - let Inst{5-4} = 0b00; + let Inst{23-22} = 0b10; + let sh = 0b100000; + let Inst{4} = 0; } -def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>; -def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), (t2USAT imm0_31:$pos, GPR:$a, 0)>; def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), (t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), + (t2SSAT imm1_32:$pos, GPR:$a, 0)>; +def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), + (t2USAT imm0_31:$pos, GPR:$a, 0)>; +def : T2Pat<(int_arm_ssat16 GPR:$a, imm1_16:$pos), + (t2SSAT16 imm1_16:$pos, GPR:$a)>; +def : T2Pat<(int_arm_usat16 GPR:$a, imm0_15:$pos), + (t2USAT16 imm0_15:$pos, GPR:$a)>; //===----------------------------------------------------------------------===// // Shift and rotate Instructions. // -defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31, shl>; +defm t2LSL : T2I_sh_ir<0b00, "lsl", imm1_31, shl>; defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr, srl>; defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr, sra>; defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, rotr>; +// LSL #0 is actually MOV, and has slightly different permitted registers to +// LSL with non-zero shift +def : t2InstAlias<"lsl${s}${p} $Rd, $Rm, #0", + (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"lsl${s}${p}.w $Rd, $Rm, #0", + (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>; + // (rotr x, (and y, 0x...1f)) ==> (ROR x, y) def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), (t2RORrr rGPR:$lhs, rGPR:$rhs)>; @@ -2547,7 +2598,8 @@ def : T2Pat<(t2_so_imm_not:$src), let isCommutable = 1 in def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> { + [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -2558,7 +2610,8 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, class T2FourRegMLA<bits<4> op7_4, string opc, list<dag> pattern> : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>, - Requires<[IsThumb2, UseMulOps]> { + Requires<[IsThumb2, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -2575,8 +2628,12 @@ def t2MLS: T2FourRegMLA<0b0001, "mls", // Extra precision multiplies with low / high results let hasSideEffects = 0 in { let isCommutable = 1 in { -def t2SMULL : T2MulLong<0b000, 0b0000, "smull", []>; -def t2UMULL : T2MulLong<0b010, 0b0000, "umull", []>; +def t2SMULL : T2MulLong<0b000, 0b0000, "smull", + [(set rGPR:$RdLo, rGPR:$RdHi, + (smullohi rGPR:$Rn, rGPR:$Rm))]>; +def t2UMULL : T2MulLong<0b010, 0b0000, "umull", + [(set rGPR:$RdLo, rGPR:$RdHi, + (umullohi rGPR:$Rn, rGPR:$Rm))]>; } // isCommutable // Multiply + accumulate @@ -2592,7 +2649,8 @@ class T2SMMUL<bits<4> op7_4, string opc, list<dag> pattern> : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, opc, "\t$Rd, $Rn, $Rm", pattern>, - Requires<[IsThumb2, HasDSP]> { + Requires<[IsThumb2, HasDSP]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -2607,7 +2665,8 @@ class T2FourRegSMMLA<bits<3> op22_20, bits<4> op7_4, string opc, list<dag> pattern> : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>, - Requires<[IsThumb2, HasDSP, UseMulOps]> { + Requires<[IsThumb2, HasDSP, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = op22_20; @@ -2624,7 +2683,8 @@ class T2ThreeRegSMUL<bits<3> op22_20, bits<2> op5_4, string opc, list<dag> pattern> : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, opc, "\t$Rd, $Rn, $Rm", pattern>, - Requires<[IsThumb2, HasDSP]> { + Requires<[IsThumb2, HasDSP]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = op22_20; @@ -2645,8 +2705,10 @@ def t2SMULTB : T2ThreeRegSMUL<0b001, 0b10, "smultb", def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt", [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)), (sra rGPR:$Rm, (i32 16))))]>; -def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", []>; -def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", []>; +def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", + [(set rGPR:$Rd, (ARMsmulwb rGPR:$Rn, rGPR:$Rm))]>; +def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", + [(set rGPR:$Rd, (ARMsmulwt rGPR:$Rn, rGPR:$Rm))]>; def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn), (t2SMULBB rGPR:$Rm, rGPR:$Rn)>; @@ -2654,12 +2716,25 @@ def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))), (t2SMULBT rGPR:$Rn, rGPR:$Rm)>; def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm), (t2SMULTB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulbb rGPR:$Rn, rGPR:$Rm), + (t2SMULBB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulbt rGPR:$Rn, rGPR:$Rm), + (t2SMULBT rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smultb rGPR:$Rn, rGPR:$Rm), + (t2SMULTB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smultt rGPR:$Rn, rGPR:$Rm), + (t2SMULTT rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulwb rGPR:$Rn, rGPR:$Rm), + (t2SMULWB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulwt rGPR:$Rn, rGPR:$Rm), + (t2SMULWT rGPR:$Rn, rGPR:$Rm)>; class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc, list<dag> pattern> : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMUL16, opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>, - Requires<[IsThumb2, HasDSP, UseMulOps]> { + Requires<[IsThumb2, HasDSP, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = op22_20; @@ -2680,8 +2755,10 @@ def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb", def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt", [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)), (sra rGPR:$Rm, (i32 16)))))]>; -def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", []>; -def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", []>; +def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", + [(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwb rGPR:$Rn, rGPR:$Rm)))]>; +def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", + [(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwt rGPR:$Rn, rGPR:$Rm)))]>; def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)), (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; @@ -2692,58 +2769,93 @@ def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)), (t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; -class T2SMLAL<bits<3> op22_20, bits<4> op7_4, string opc, list<dag> pattern> - : T2FourReg_mac<1, op22_20, op7_4, - (outs rGPR:$Ra, rGPR:$Rd), - (ins rGPR:$Rn, rGPR:$Rm), - IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]>; +def : Thumb2DSPPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc), + (t2SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : Thumb2DSPPat<(int_arm_smlabt GPR:$a, GPR:$b, GPR:$acc), + (t2SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : Thumb2DSPPat<(int_arm_smlatb GPR:$a, GPR:$b, GPR:$acc), + (t2SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : Thumb2DSPPat<(int_arm_smlatt GPR:$a, GPR:$b, GPR:$acc), + (t2SMLATT GPR:$a, GPR:$b, GPR:$acc)>; +def : Thumb2DSPPat<(int_arm_smlawb GPR:$a, GPR:$b, GPR:$acc), + (t2SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; +def : Thumb2DSPPat<(int_arm_smlawt GPR:$a, GPR:$b, GPR:$acc), + (t2SMLAWT GPR:$a, GPR:$b, GPR:$acc)>; // Halfword multiple accumulate long: SMLAL<x><y> -def t2SMLALBB : T2SMLAL<0b100, 0b1000, "smlalbb", []>; -def t2SMLALBT : T2SMLAL<0b100, 0b1001, "smlalbt", []>; -def t2SMLALTB : T2SMLAL<0b100, 0b1010, "smlaltb", []>; -def t2SMLALTT : T2SMLAL<0b100, 0b1011, "smlaltt", []>; - -class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc> +def t2SMLALBB : T2MlaLong<0b100, 0b1000, "smlalbb">, + Requires<[IsThumb2, HasDSP]>; +def t2SMLALBT : T2MlaLong<0b100, 0b1001, "smlalbt">, + Requires<[IsThumb2, HasDSP]>; +def t2SMLALTB : T2MlaLong<0b100, 0b1010, "smlaltb">, + Requires<[IsThumb2, HasDSP]>; +def t2SMLALTT : T2MlaLong<0b100, 0b1011, "smlaltt">, + Requires<[IsThumb2, HasDSP]>; + +def : Thumb2DSPPat<(ARMsmlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (t2SMLALBB $Rn, $Rm, $RLo, $RHi)>; +def : Thumb2DSPPat<(ARMsmlalbt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (t2SMLALBT $Rn, $Rm, $RLo, $RHi)>; +def : Thumb2DSPPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (t2SMLALTB $Rn, $Rm, $RLo, $RHi)>; +def : Thumb2DSPPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (t2SMLALTT $Rn, $Rm, $RLo, $RHi)>; + +class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc, + Intrinsic intrinsic> : T2ThreeReg_mac<0, op22_20, op7_4, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]> { + IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (intrinsic rGPR:$Rn, rGPR:$Rm))]>, + Requires<[IsThumb2, HasDSP]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { let Inst{15-12} = 0b1111; } // Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD -def t2SMUAD: T2DualHalfMul<0b010, 0b0000, "smuad">; -def t2SMUADX: T2DualHalfMul<0b010, 0b0001, "smuadx">; -def t2SMUSD: T2DualHalfMul<0b100, 0b0000, "smusd">; -def t2SMUSDX: T2DualHalfMul<0b100, 0b0001, "smusdx">; +def t2SMUAD: T2DualHalfMul<0b010, 0b0000, "smuad", int_arm_smuad>; +def t2SMUADX: T2DualHalfMul<0b010, 0b0001, "smuadx", int_arm_smuadx>; +def t2SMUSD: T2DualHalfMul<0b100, 0b0000, "smusd", int_arm_smusd>; +def t2SMUSDX: T2DualHalfMul<0b100, 0b0001, "smusdx", int_arm_smusdx>; -class T2DualHalfMulAdd<bits<3> op22_20, bits<4> op7_4, string opc> +class T2DualHalfMulAdd<bits<3> op22_20, bits<4> op7_4, string opc, + Intrinsic intrinsic> : T2FourReg_mac<0, op22_20, op7_4, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), - IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", []>, + IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (intrinsic rGPR:$Rn, rGPR:$Rm, rGPR:$Ra))]>, Requires<[IsThumb2, HasDSP]>; -def t2SMLAD : T2DualHalfMulAdd<0b010, 0b0000, "smlad">; -def t2SMLADX : T2DualHalfMulAdd<0b010, 0b0001, "smladx">; -def t2SMLSD : T2DualHalfMulAdd<0b100, 0b0000, "smlsd">; -def t2SMLSDX : T2DualHalfMulAdd<0b100, 0b0001, "smlsdx">; +def t2SMLAD : T2DualHalfMulAdd<0b010, 0b0000, "smlad", int_arm_smlad>; +def t2SMLADX : T2DualHalfMulAdd<0b010, 0b0001, "smladx", int_arm_smladx>; +def t2SMLSD : T2DualHalfMulAdd<0b100, 0b0000, "smlsd", int_arm_smlsd>; +def t2SMLSDX : T2DualHalfMulAdd<0b100, 0b0001, "smlsdx", int_arm_smlsdx>; class T2DualHalfMulAddLong<bits<3> op22_20, bits<4> op7_4, string opc> : T2FourReg_mac<1, op22_20, op7_4, (outs rGPR:$Ra, rGPR:$Rd), - (ins rGPR:$Rn, rGPR:$Rm), + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]>; + RegConstraint<"$Ra = $RLo, $Rd = $RHi">, + Requires<[IsThumb2, HasDSP]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def t2SMLALD : T2DualHalfMulAddLong<0b100, 0b1100, "smlald">; def t2SMLALDX : T2DualHalfMulAddLong<0b100, 0b1101, "smlaldx">; def t2SMLSLD : T2DualHalfMulAddLong<0b101, 0b1100, "smlsld">; def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">; +def : Thumb2DSPPat<(ARMSmlald rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), + (t2SMLALD rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>; +def : Thumb2DSPPat<(ARMSmlaldx rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), + (t2SMLALDX rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>; +def : Thumb2DSPPat<(ARMSmlsld rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), + (t2SMLSLD rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>; +def : Thumb2DSPPat<(ARMSmlsldx rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), + (t2SMLSLDX rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>; + //===----------------------------------------------------------------------===// // Division Instructions. // Signed and unsigned division on v7-M @@ -2751,7 +2863,8 @@ def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">; def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "sdiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb, HasV8MBaseline]> { + Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>, + Sched<[WriteDIV]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011100; let Inst{20} = 0b1; @@ -2762,7 +2875,8 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "udiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb, HasV8MBaseline]> { + Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>, + Sched<[WriteDIV]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011101; let Inst{20} = 0b1; @@ -2819,7 +2933,7 @@ def t2PKHBT : T2ThreeReg< [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF), (and (shl rGPR:$Rm, pkh_lsl_amt:$sh), 0xFFFF0000)))]>, - Requires<[HasT2ExtractPack, IsThumb2]>, + Requires<[HasDSP, IsThumb2]>, Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -2835,10 +2949,10 @@ def t2PKHBT : T2ThreeReg< // Alternate cases for PKHBT where identities eliminate some nodes. def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)), (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)), (t2PKHBT rGPR:$src1, rGPR:$src2, imm16_31:$sh)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; // Note: Shifts of 1-15 bits will be transformed to srl instead of sra and // will match the pattern below. @@ -2848,7 +2962,7 @@ def t2PKHTB : T2ThreeReg< [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000), (and (sra rGPR:$Rm, pkh_asr_amt:$sh), 0xFFFF)))]>, - Requires<[HasT2ExtractPack, IsThumb2]>, + Requires<[HasDSP, IsThumb2]>, Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -2867,14 +2981,14 @@ def t2PKHTB : T2ThreeReg< // pkhtb src1, src2, asr (17..31). def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16:$sh)), (t2PKHTB rGPR:$src1, rGPR:$src2, imm16:$sh)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (sra rGPR:$src2, imm16_31:$sh)), (t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)), (t2PKHTB rGPR:$src1, rGPR:$src2, imm1_15:$sh)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; //===----------------------------------------------------------------------===// // CRC32 Instructions @@ -3380,7 +3494,8 @@ def t2B : T2I<(outs), (ins thumb_br_target:$target), IIC_Br, let AsmMatchConverter = "cvtThumbBranches"; } -let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in { +let Size = 4, isNotDuplicable = 1, isBranch = 1, isTerminator = 1, + isBarrier = 1, isIndirectBranch = 1 in { // available in both v8-M.Baseline and Thumb2 targets def t2BR_JT : t2basePseudoInst<(outs), @@ -4216,13 +4331,13 @@ def : T2Pat<(and rGPR:$Rm, 0x000000FF), (t2UXTB rGPR:$Rm, 0)>, def : T2Pat<(and rGPR:$Rm, 0x0000FFFF), (t2UXTH rGPR:$Rm, 0)>, Requires<[IsThumb2]>; def : T2Pat<(and rGPR:$Rm, 0x00FF00FF), (t2UXTB16 rGPR:$Rm, 0)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0x00FF)), (t2UXTAB rGPR:$Rn, rGPR:$Rm, 0)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0xFFFF)), (t2UXTAH rGPR:$Rn, rGPR:$Rm, 0)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; } def : T2Pat<(sext_inreg rGPR:$Src, i8), (t2SXTB rGPR:$Src, 0)>, @@ -4231,10 +4346,10 @@ def : T2Pat<(sext_inreg rGPR:$Src, i16), (t2SXTH rGPR:$Src, 0)>, Requires<[IsThumb2]>; def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i8)), (t2SXTAB rGPR:$Rn, rGPR:$Rm, 0)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)), (t2SXTAH rGPR:$Rn, rGPR:$Rm, 0)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; // Atomic load/store patterns def : T2Pat<(atomic_load_8 t2addrmode_imm12:$addr), @@ -4325,26 +4440,26 @@ def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm", pred:$p, cc_out:$s)>; // add w/ negative immediates is just a sub. -def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", +def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm", (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"add${p} $Rd, $Rn, $imm", +def : t2InstSubst<"add${p} $Rd, $Rn, $imm", (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; -def : t2InstAlias<"add${s}${p} $Rdn, $imm", +def : t2InstSubst<"add${s}${p} $Rdn, $imm", (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"add${p} $Rdn, $imm", +def : t2InstSubst<"add${p} $Rdn, $imm", (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>; -def : t2InstAlias<"add${s}${p}.w $Rd, $Rn, $imm", +def : t2InstSubst<"add${s}${p}.w $Rd, $Rn, $imm", (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"addw${p} $Rd, $Rn, $imm", +def : t2InstSubst<"addw${p} $Rd, $Rn, $imm", (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; -def : t2InstAlias<"add${s}${p}.w $Rdn, $imm", +def : t2InstSubst<"add${s}${p}.w $Rdn, $imm", (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"addw${p} $Rdn, $imm", +def : t2InstSubst<"addw${p} $Rdn, $imm", (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>; @@ -4431,10 +4546,10 @@ def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm", // input operands swapped when the shift amount is zero (i.e., unspecified). def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm", (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm", (t2PKHBT rGPR:$Rd, rGPR:$Rm, rGPR:$Rn, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; // PUSH/POP aliases for STM/LDM def : t2InstAlias<"push${p}.w $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>; @@ -4513,16 +4628,16 @@ def : t2InstAlias<"strh${p} $Rt, $addr", // Extend instruction optional rotate operand. def : InstAlias<"sxtab${p} $Rd, $Rn, $Rm", (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : InstAlias<"sxtah${p} $Rd, $Rn, $Rm", (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : InstAlias<"sxtab16${p} $Rd, $Rn, $Rm", (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : InstAlias<"sxtb16${p} $Rd, $Rm", (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : t2InstAlias<"sxtb${p} $Rd, $Rm", (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; @@ -4535,16 +4650,16 @@ def : t2InstAlias<"sxth${p}.w $Rd, $Rm", def : InstAlias<"uxtab${p} $Rd, $Rn, $Rm", (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : InstAlias<"uxtah${p} $Rd, $Rn, $Rm", (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : InstAlias<"uxtab16${p} $Rd, $Rn, $Rm", (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : InstAlias<"uxtb16${p} $Rd, $Rm", (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : t2InstAlias<"uxtb${p} $Rd, $Rm", (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; @@ -4560,7 +4675,7 @@ def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot", (t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; def : InstAlias<"uxtb16${p} $Rd, $Rm$rot", (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : t2InstAlias<"uxth${p} $Rd, $Rm$rot", (t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; @@ -4568,41 +4683,54 @@ def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot", (t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; def : InstAlias<"sxtb16${p} $Rd, $Rm$rot", (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : t2InstAlias<"sxth${p} $Rd, $Rm$rot", (t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; // "mov Rd, t2_so_imm_not" can be handled via "mvn" in assembly, just like // for isel. -def : t2InstAlias<"mov${p} $Rd, $imm", +def : t2InstSubst<"mov${p} $Rd, $imm", (t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; -def : t2InstAlias<"mvn${p} $Rd, $imm", - (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; +def : t2InstSubst<"mvn${s}${p} $Rd, $imm", + (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>; // Same for AND <--> BIC -def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm", +def : t2InstSubst<"bic${s}${p} $Rd, $Rn, $imm", (t2ANDri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"bic${s}${p} $Rdn, $imm", +def : t2InstSubst<"bic${s}${p} $Rdn, $imm", (t2ANDri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm", +def : t2InstSubst<"and${s}${p} $Rd, $Rn, $imm", (t2BICri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"and${s}${p} $Rdn, $imm", +def : t2InstSubst<"and${s}${p} $Rdn, $imm", (t2BICri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; +// And ORR <--> ORN +def : t2InstSubst<"orn${s}${p} $Rd, $Rn, $imm", + (t2ORRri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstSubst<"orn${s}${p} $Rdn, $imm", + (t2ORRri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstSubst<"orr${s}${p} $Rd, $Rn, $imm", + (t2ORNri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstSubst<"orr${s}${p} $Rdn, $imm", + (t2ORNri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, + pred:$p, cc_out:$s)>; // Likewise, "add Rd, t2_so_imm_neg" -> sub -def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", +def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm", (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"add${s}${p} $Rd, $imm", +def : t2InstSubst<"add${s}${p} $Rd, $imm", (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; // Same for CMP <--> CMN via t2_so_imm_neg -def : t2InstAlias<"cmp${p} $Rd, $imm", +def : t2InstSubst<"cmp${p} $Rd, $imm", (t2CMNri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>; -def : t2InstAlias<"cmn${p} $Rd, $imm", +def : t2InstSubst<"cmn${p} $Rd, $imm", (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>; @@ -4616,6 +4744,8 @@ def : t2InstAlias<"neg${s}${p} $Rd, $Rm", // MOV so_reg assembler pseudos. InstAlias isn't expressive enough for // these, unfortunately. +// FIXME: LSL #0 in the shift should allow SP to be used as either the +// source or destination (but not both). def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift", (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift", @@ -4626,6 +4756,16 @@ def t2MOVsr: t2AsmPseudo<"mov${p} $Rd, $shift", def t2MOVSsr: t2AsmPseudo<"movs${p} $Rd, $shift", (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; +// Aliases for the above with the .w qualifier +def : t2InstAlias<"mov${p}.w $Rd, $shift", + (t2MOVsi rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; +def : t2InstAlias<"movs${p}.w $Rd, $shift", + (t2MOVSsi rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; +def : t2InstAlias<"mov${p}.w $Rd, $shift", + (t2MOVsr rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; +def : t2InstAlias<"movs${p}.w $Rd, $shift", + (t2MOVSsr rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; + // ADR w/o the .w suffix def : t2InstAlias<"adr${p} $Rd, $addr", (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>; @@ -4659,7 +4799,7 @@ def : t2InstAlias<"add${p} $Rd, pc, $imm", // Pseudo instruction ldr Rt, =immediate def t2LDRConstPool : t2AsmPseudo<"ldr${p} $Rt, $immediate", - (ins GPRnopc:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; + (ins GPR:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; // Version w/ the .w suffix. def : t2InstAlias<"ldr${p}.w $Rt, $immediate", (t2LDRConstPool GPRnopc:$Rt, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td index e990486..5d887c4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -11,14 +11,17 @@ // //===----------------------------------------------------------------------===// -def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>; +def SDT_CMPFP0 : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisVT<1, i32>]>; def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; +def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, + SDTCisVT<2, f64>]>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>; -def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>; +def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMFCmp, [SDNPOutGlue]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>; def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; +def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>; //===----------------------------------------------------------------------===// // Operand Definitions. @@ -336,13 +339,15 @@ let TwoOperandAliasConstraint = "$Dn = $Dd" in def VADDD : ADbI<0b11100, 0b11, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>; + [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>, + Sched<[WriteFPALU64]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in def VADDS : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> { + [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]>, + Sched<[WriteFPALU32]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -352,19 +357,22 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in def VADDH : AHbI<0b11100, 0b11, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm", - []>; + []>, + Sched<[WriteFPALU32]>; let TwoOperandAliasConstraint = "$Dn = $Dd" in def VSUBD : ADbI<0b11100, 0b11, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>; + [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>, + Sched<[WriteFPALU64]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> { + [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]>, + Sched<[WriteFPALU32]>{ // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -374,37 +382,43 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in def VSUBH : AHbI<0b11100, 0b11, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm", - []>; + []>, + Sched<[WriteFPALU32]>; let TwoOperandAliasConstraint = "$Dn = $Dd" in def VDIVD : ADbI<0b11101, 0b00, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>; + [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>, + Sched<[WriteFPDIV64]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in def VDIVS : ASbI<0b11101, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>; + [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>, + Sched<[WriteFPDIV32]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in def VDIVH : AHbI<0b11101, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm", - []>; + []>, + Sched<[WriteFPDIV32]>; let TwoOperandAliasConstraint = "$Dn = $Dd" in def VMULD : ADbI<0b11100, 0b10, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>; + [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>, + Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> { + [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]>, + Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -414,17 +428,20 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in def VMULH : AHbI<0b11100, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm", - []>; + []>, + Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>; def VNMULD : ADbI<0b11100, 0b10, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>; + [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>, + Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>; def VNMULS : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> { + [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]>, + Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -433,7 +450,8 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, def VNMULH : AHbI<0b11100, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm", - []>; + []>, + Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>; multiclass vsel_inst<string op, bits<2> opc, int CC> { let DecoderNamespace = "VFPV8", PostEncoderMethod = "", @@ -501,12 +519,12 @@ let Defs = [FPSCR_NZCV] in { def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", - [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>; + [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 1))]>; def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", - [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { + [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 1))]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -517,17 +535,15 @@ def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0, IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm", []>; - -// FIXME: Verify encoding after integrated assembler is working. def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", - [/* For disassembly only; pattern left blank */]>; + [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 0))]>; def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]> { + [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 0))]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -566,7 +582,7 @@ let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", - [(arm_cmpfp0 (f64 DPR:$Dd))]> { + [(arm_cmpfp0 (f64 DPR:$Dd), (i32 1))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -574,7 +590,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$Sd), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", - [(arm_cmpfp0 SPR:$Sd)]> { + [(arm_cmpfp0 SPR:$Sd, (i32 1))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -591,11 +607,10 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, let Inst{5} = 0; } -// FIXME: Verify encoding after integrated assembler is working. def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", - [/* For disassembly only; pattern left blank */]> { + [(arm_cmpfp0 (f64 DPR:$Dd), (i32 0))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -603,7 +618,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$Sd), IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", - [/* For disassembly only; pattern left blank */]> { + [(arm_cmpfp0 SPR:$Sd, (i32 0))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -624,7 +639,8 @@ def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", - [(set DPR:$Dd, (fpextend SPR:$Sm))]> { + [(set DPR:$Dd, (fpextend SPR:$Sm))]>, + Sched<[WriteFPCVT]> { // Instruction operands. bits<5> Dd; bits<5> Sm; @@ -641,7 +657,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, // Special case encoding: bits 11-8 is 0b1011. def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (fpround DPR:$Dm))]> { + [(set SPR:$Sd, (fpround DPR:$Dm))]>, + Sched<[WriteFPCVT]> { // Instruction operands. bits<5> Sd; bits<5> Dm; @@ -663,31 +680,35 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, // Between half, single and double-precision. For disassembly only. -// FIXME: Verify encoding after integrated assembler is working. def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>, - Requires<[HasFP16]>; + Requires<[HasFP16]>, + Sched<[WriteFPCVT]>; def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>, - Requires<[HasFP16]>; + Requires<[HasFP16]>, + Sched<[WriteFPCVT]>; def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>, - Requires<[HasFP16]>; + Requires<[HasFP16]>, + Sched<[WriteFPCVT]>; def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>, - Requires<[HasFP16]>; + Requires<[HasFP16]>, + Sched<[WriteFPCVT]>; def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs DPR:$Dd), (ins SPR:$Sm), NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", - []>, Requires<[HasFPARMv8, HasDPVFP]> { + []>, Requires<[HasFPARMv8, HasDPVFP]>, + Sched<[WriteFPCVT]> { // Instruction operands. bits<5> Sm; @@ -946,12 +967,14 @@ defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>; def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", - [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>; + [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>, + Sched<[WriteFPSQRT64]>; def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", - [(set SPR:$Sd, (fsqrt SPR:$Sm))]>; + [(set SPR:$Sd, (fsqrt SPR:$Sm))]>, + Sched<[WriteFPSQRT32]>; def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), @@ -987,7 +1010,8 @@ def VINSH : ASuInp<0b11101, 0b11, 0b0000, 0b11, 0, def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$Rt), (ins SPR:$Sn), IIC_fpMOVSI, "vmov", "\t$Rt, $Sn", - [(set GPR:$Rt, (bitconvert SPR:$Sn))]> { + [(set GPR:$Rt, (bitconvert SPR:$Sn))]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<4> Rt; bits<5> Sn; @@ -1010,7 +1034,8 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$Sn), (ins GPR:$Rt), IIC_fpMOVIS, "vmov", "\t$Sn, $Rt", [(set SPR:$Sn, (bitconvert GPR:$Rt))]>, - Requires<[HasVFP2, UseVMOVSR]> { + Requires<[HasVFP2, UseVMOVSR]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<5> Sn; bits<4> Rt; @@ -1032,7 +1057,8 @@ let hasSideEffects = 0 in { def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm), IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm", - [/* FIXME: Can't write pattern for multiple result instr*/]> { + [(set GPR:$Rt, GPR:$Rt2, (arm_fmrrd DPR:$Dm))]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<5> Dm; bits<4> Rt; @@ -1059,7 +1085,8 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011, def VMOVRRS : AVConv3I<0b11000101, 0b1010, (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2), IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2", - [/* For disassembly only; pattern left blank */]> { + [/* For disassembly only; pattern left blank */]>, + Sched<[WriteFPMOV]> { bits<5> src1; bits<4> Rt; bits<4> Rt2; @@ -1085,7 +1112,8 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, def VMOVDRR : AVConv5I<0b11000100, 0b1011, (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2), IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2", - [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> { + [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<5> Dm; bits<4> Rt; @@ -1128,7 +1156,8 @@ let hasSideEffects = 0 in def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", - [/* For disassembly only; pattern left blank */]> { + [/* For disassembly only; pattern left blank */]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<5> dst1; bits<4> src1; @@ -1154,7 +1183,8 @@ def VMOVRH : AVConv2I<0b11100001, 0b1001, (outs GPR:$Rt), (ins SPR:$Sn), IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn", []>, - Requires<[HasFullFP16]> { + Requires<[HasFullFP16]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<4> Rt; bits<5> Sn; @@ -1173,7 +1203,8 @@ def VMOVHR : AVConv4I<0b11100000, 0b1001, (outs SPR:$Sn), (ins GPR:$Rt), IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt", []>, - Requires<[HasFullFP16]> { + Requires<[HasFullFP16]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<5> Sn; bits<4> Rt; @@ -1254,7 +1285,8 @@ class AVConv1IHs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // s32 } @@ -1269,7 +1301,8 @@ let Predicates=[HasVFP2, HasDPVFP] in { def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$Sd),(ins SPR:$Sm), IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // s32 // Some single precision VFP instructions may be executed on both NEON and @@ -1286,14 +1319,16 @@ def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))), def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTIH, "vcvt", ".f16.s32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // s32 } def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // u32 } @@ -1308,7 +1343,8 @@ let Predicates=[HasVFP2, HasDPVFP] in { def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // u32 // Some single precision VFP instructions may be executed on both NEON and @@ -1325,7 +1361,8 @@ def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))), def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTIH, "vcvt", ".f16.u32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // u32 } @@ -1390,7 +1427,8 @@ class AVConv1IsH_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit } @@ -1405,7 +1443,8 @@ let Predicates=[HasVFP2, HasDPVFP] in { def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit // Some single precision VFP instructions may be executed on both NEON and @@ -1423,14 +1462,16 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))), def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit } def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit } @@ -1445,7 +1486,8 @@ let Predicates=[HasVFP2, HasDPVFP] in { def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit // Some single precision VFP instructions may be executed on both NEON and @@ -1463,52 +1505,58 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))), def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit } // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. let Uses = [FPSCR] in { -// FIXME: Verify encoding after integrated assembler is working. def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{ + [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm", - [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> { + [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTHI, "vcvtr", ".s32.f16\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{ + [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm", - [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> { + [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTHI, "vcvtr", ".u32.f16\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } } @@ -1528,8 +1576,7 @@ let Constraints = "$a = $dst" in { class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>, - Sched<[WriteCvtFP]> { + : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> { bits<5> dst; // if dp_operation then UInt(D:Vd) else UInt(Vd:D); let Inst{22} = dst{0}; @@ -1540,8 +1587,7 @@ class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>, - Sched<[WriteCvtFP]> { + : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> { bits<5> dst; // if dp_operation then UInt(D:Vd) else UInt(Vd:D); let Inst{22} = dst{4}; @@ -1553,26 +1599,31 @@ class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VTOUHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTHI, "vcvt", ".u16.f16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VTOSLH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTHI, "vcvt", ".s32.f16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTHI, "vcvt", ".u32.f16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), - IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> { + IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1604,45 +1655,54 @@ def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1, def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), - IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>; + IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), - IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>; + IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), - IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>; + IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), - IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>; + IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; // Fixed-Point to FP: def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VUHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTIH, "vcvt", ".f16.u16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VSLTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTIH, "vcvt", ".f16.s32\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTIH, "vcvt", ".f16.u32\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> { + IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1650,7 +1710,8 @@ def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0, def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> { + IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1658,7 +1719,8 @@ def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0, def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> { + IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1666,7 +1728,8 @@ def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1, def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> { + IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1674,19 +1737,23 @@ def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1, def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), - IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>; + IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), - IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>; + IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), - IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>; + IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), - IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>; + IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; } // End of 'let Constraints = "$a = $dst" in' @@ -1700,7 +1767,8 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1708,7 +1776,8 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1734,7 +1803,8 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1742,7 +1812,8 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1768,7 +1839,8 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1776,7 +1848,8 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1802,14 +1875,16 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1838,7 +1913,8 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFMAS : ASbIn<0b11101, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1846,7 +1922,8 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0, [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -1856,7 +1933,8 @@ def VFMAH : AHbI<0b11101, 0b10, 0, 0, IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, @@ -1880,7 +1958,8 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFMSS : ASbIn<0b11101, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1888,7 +1967,8 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -1898,7 +1978,8 @@ def VFMSH : AHbI<0b11101, 0b10, 1, 0, IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, @@ -1929,7 +2010,8 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFNMAS : ASbI<0b11101, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1937,7 +2019,8 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -1947,7 +2030,8 @@ def VFNMAH : AHbI<0b11101, 0b01, 1, 0, IIC_fpFMAC16, "vfnma", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, @@ -1978,14 +2062,16 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFNMSS : ASbI<0b11101, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -1995,7 +2081,8 @@ def VFNMSH : AHbI<0b11101, 0b01, 0, 0, IIC_fpFMAC16, "vfnms", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp index 2bdbe4f..faed6b8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -11,25 +11,112 @@ /// \todo This should be generated by TableGen. //===----------------------------------------------------------------------===// -#include "ARMInstructionSelector.h" #include "ARMRegisterBankInfo.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "arm-isel" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" + using namespace llvm; #ifndef LLVM_BUILD_GLOBAL_ISEL #error "You shouldn't build this" #endif -ARMInstructionSelector::ARMInstructionSelector(const ARMSubtarget &STI, +namespace { + +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + +class ARMInstructionSelector : public InstructionSelector { +public: + ARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI, + const ARMRegisterBankInfo &RBI); + + bool select(MachineInstr &I) const override; + +private: + bool selectImpl(MachineInstr &I) const; + + struct CmpConstants; + struct InsertInfo; + + bool selectCmp(CmpConstants Helper, MachineInstrBuilder &MIB, + MachineRegisterInfo &MRI) const; + + // Helper for inserting a comparison sequence that sets \p ResReg to either 1 + // if \p LHSReg and \p RHSReg are in the relationship defined by \p Cond, or + // \p PrevRes otherwise. In essence, it computes PrevRes OR (LHS Cond RHS). + bool insertComparison(CmpConstants Helper, InsertInfo I, unsigned ResReg, + ARMCC::CondCodes Cond, unsigned LHSReg, unsigned RHSReg, + unsigned PrevRes) const; + + // Set \p DestReg to \p Constant. + void putConstant(InsertInfo I, unsigned DestReg, unsigned Constant) const; + + bool selectSelect(MachineInstrBuilder &MIB, MachineRegisterInfo &MRI) const; + + // Check if the types match and both operands have the expected size and + // register bank. + bool validOpRegPair(MachineRegisterInfo &MRI, unsigned LHS, unsigned RHS, + unsigned ExpectedSize, unsigned ExpectedRegBankID) const; + + // Check if the register has the expected size and register bank. + bool validReg(MachineRegisterInfo &MRI, unsigned Reg, unsigned ExpectedSize, + unsigned ExpectedRegBankID) const; + + const ARMBaseInstrInfo &TII; + const ARMBaseRegisterInfo &TRI; + const ARMBaseTargetMachine &TM; + const ARMRegisterBankInfo &RBI; + const ARMSubtarget &STI; + +#define GET_GLOBALISEL_PREDICATES_DECL +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_DECL + +// We declare the temporaries used by selectImpl() in the class to minimize the +// cost of constructing placeholder values. +#define GET_GLOBALISEL_TEMPORARIES_DECL +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_DECL +}; +} // end anonymous namespace + +namespace llvm { +InstructionSelector * +createARMInstructionSelector(const ARMBaseTargetMachine &TM, + const ARMSubtarget &STI, + const ARMRegisterBankInfo &RBI) { + return new ARMInstructionSelector(TM, STI, RBI); +} +} + +unsigned zero_reg = 0; + +#define GET_GLOBALISEL_IMPL +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_IMPL + +ARMInstructionSelector::ARMInstructionSelector(const ARMBaseTargetMachine &TM, + const ARMSubtarget &STI, const ARMRegisterBankInfo &RBI) : InstructionSelector(), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) {} + TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), STI(STI), +#define GET_GLOBALISEL_PREDICATES_INIT +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_INIT +#define GET_GLOBALISEL_TEMPORARIES_INIT +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_INIT +{ +} static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, @@ -43,20 +130,21 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, assert(RegBank && "Can't get reg bank for virtual register"); const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); - (void)DstSize; - unsigned SrcReg = I.getOperand(1).getReg(); - const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); - (void)SrcSize; - assert((DstSize == SrcSize || - // Copies are a means to setup initial types, the number of - // bits may not exactly match. - (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - DstSize <= SrcSize)) && - "Copy with different width?!"); - - assert(RegBank->getID() == ARM::GPRRegBankID && "Unsupported reg bank"); + assert((RegBank->getID() == ARM::GPRRegBankID || + RegBank->getID() == ARM::FPRRegBankID) && + "Unsupported reg bank"); + const TargetRegisterClass *RC = &ARM::GPRRegClass; + if (RegBank->getID() == ARM::FPRRegBankID) { + if (DstSize == 32) + RC = &ARM::SPRRegClass; + else if (DstSize == 64) + RC = &ARM::DPRRegClass; + else + llvm_unreachable("Unsupported destination size"); + } + // No need to constrain SrcReg. It will get constrained when // we hit another of its uses or its defs. // Copies do not have constraints. @@ -68,6 +156,375 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, return true; } +static bool selectMergeValues(MachineInstrBuilder &MIB, + const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + assert(TII.getSubtarget().hasVFP2() && "Can't select merge without VFP"); + + // We only support G_MERGE_VALUES as a way to stick together two scalar GPRs + // into one DPR. + unsigned VReg0 = MIB->getOperand(0).getReg(); + (void)VReg0; + assert(MRI.getType(VReg0).getSizeInBits() == 64 && + RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::FPRRegBankID && + "Unsupported operand for G_MERGE_VALUES"); + unsigned VReg1 = MIB->getOperand(1).getReg(); + (void)VReg1; + assert(MRI.getType(VReg1).getSizeInBits() == 32 && + RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID && + "Unsupported operand for G_MERGE_VALUES"); + unsigned VReg2 = MIB->getOperand(2).getReg(); + (void)VReg2; + assert(MRI.getType(VReg2).getSizeInBits() == 32 && + RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::GPRRegBankID && + "Unsupported operand for G_MERGE_VALUES"); + + MIB->setDesc(TII.get(ARM::VMOVDRR)); + MIB.add(predOps(ARMCC::AL)); + + return true; +} + +static bool selectUnmergeValues(MachineInstrBuilder &MIB, + const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + assert(TII.getSubtarget().hasVFP2() && "Can't select unmerge without VFP"); + + // We only support G_UNMERGE_VALUES as a way to break up one DPR into two + // GPRs. + unsigned VReg0 = MIB->getOperand(0).getReg(); + (void)VReg0; + assert(MRI.getType(VReg0).getSizeInBits() == 32 && + RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::GPRRegBankID && + "Unsupported operand for G_UNMERGE_VALUES"); + unsigned VReg1 = MIB->getOperand(1).getReg(); + (void)VReg1; + assert(MRI.getType(VReg1).getSizeInBits() == 32 && + RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID && + "Unsupported operand for G_UNMERGE_VALUES"); + unsigned VReg2 = MIB->getOperand(2).getReg(); + (void)VReg2; + assert(MRI.getType(VReg2).getSizeInBits() == 64 && + RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::FPRRegBankID && + "Unsupported operand for G_UNMERGE_VALUES"); + + MIB->setDesc(TII.get(ARM::VMOVRRD)); + MIB.add(predOps(ARMCC::AL)); + + return true; +} + +/// Select the opcode for simple extensions (that translate to a single SXT/UXT +/// instruction). Extension operations more complicated than that should not +/// invoke this. Returns the original opcode if it doesn't know how to select a +/// better one. +static unsigned selectSimpleExtOpc(unsigned Opc, unsigned Size) { + using namespace TargetOpcode; + + if (Size != 8 && Size != 16) + return Opc; + + if (Opc == G_SEXT) + return Size == 8 ? ARM::SXTB : ARM::SXTH; + + if (Opc == G_ZEXT) + return Size == 8 ? ARM::UXTB : ARM::UXTH; + + return Opc; +} + +/// Select the opcode for simple loads and stores. For types smaller than 32 +/// bits, the value will be zero extended. Returns the original opcode if it +/// doesn't know how to select a better one. +static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank, + unsigned Size) { + bool isStore = Opc == TargetOpcode::G_STORE; + + if (RegBank == ARM::GPRRegBankID) { + switch (Size) { + case 1: + case 8: + return isStore ? ARM::STRBi12 : ARM::LDRBi12; + case 16: + return isStore ? ARM::STRH : ARM::LDRH; + case 32: + return isStore ? ARM::STRi12 : ARM::LDRi12; + default: + return Opc; + } + } + + if (RegBank == ARM::FPRRegBankID) { + switch (Size) { + case 32: + return isStore ? ARM::VSTRS : ARM::VLDRS; + case 64: + return isStore ? ARM::VSTRD : ARM::VLDRD; + default: + return Opc; + } + } + + return Opc; +} + +// When lowering comparisons, we sometimes need to perform two compares instead +// of just one. Get the condition codes for both comparisons. If only one is +// needed, the second member of the pair is ARMCC::AL. +static std::pair<ARMCC::CondCodes, ARMCC::CondCodes> +getComparePreds(CmpInst::Predicate Pred) { + std::pair<ARMCC::CondCodes, ARMCC::CondCodes> Preds = {ARMCC::AL, ARMCC::AL}; + switch (Pred) { + case CmpInst::FCMP_ONE: + Preds = {ARMCC::GT, ARMCC::MI}; + break; + case CmpInst::FCMP_UEQ: + Preds = {ARMCC::EQ, ARMCC::VS}; + break; + case CmpInst::ICMP_EQ: + case CmpInst::FCMP_OEQ: + Preds.first = ARMCC::EQ; + break; + case CmpInst::ICMP_SGT: + case CmpInst::FCMP_OGT: + Preds.first = ARMCC::GT; + break; + case CmpInst::ICMP_SGE: + case CmpInst::FCMP_OGE: + Preds.first = ARMCC::GE; + break; + case CmpInst::ICMP_UGT: + case CmpInst::FCMP_UGT: + Preds.first = ARMCC::HI; + break; + case CmpInst::FCMP_OLT: + Preds.first = ARMCC::MI; + break; + case CmpInst::ICMP_ULE: + case CmpInst::FCMP_OLE: + Preds.first = ARMCC::LS; + break; + case CmpInst::FCMP_ORD: + Preds.first = ARMCC::VC; + break; + case CmpInst::FCMP_UNO: + Preds.first = ARMCC::VS; + break; + case CmpInst::FCMP_UGE: + Preds.first = ARMCC::PL; + break; + case CmpInst::ICMP_SLT: + case CmpInst::FCMP_ULT: + Preds.first = ARMCC::LT; + break; + case CmpInst::ICMP_SLE: + case CmpInst::FCMP_ULE: + Preds.first = ARMCC::LE; + break; + case CmpInst::FCMP_UNE: + case CmpInst::ICMP_NE: + Preds.first = ARMCC::NE; + break; + case CmpInst::ICMP_UGE: + Preds.first = ARMCC::HS; + break; + case CmpInst::ICMP_ULT: + Preds.first = ARMCC::LO; + break; + default: + break; + } + assert(Preds.first != ARMCC::AL && "No comparisons needed?"); + return Preds; +} + +struct ARMInstructionSelector::CmpConstants { + CmpConstants(unsigned CmpOpcode, unsigned FlagsOpcode, unsigned OpRegBank, + unsigned OpSize) + : ComparisonOpcode(CmpOpcode), ReadFlagsOpcode(FlagsOpcode), + OperandRegBankID(OpRegBank), OperandSize(OpSize) {} + + // The opcode used for performing the comparison. + const unsigned ComparisonOpcode; + + // The opcode used for reading the flags set by the comparison. May be + // ARM::INSTRUCTION_LIST_END if we don't need to read the flags. + const unsigned ReadFlagsOpcode; + + // The assumed register bank ID for the operands. + const unsigned OperandRegBankID; + + // The assumed size in bits for the operands. + const unsigned OperandSize; +}; + +struct ARMInstructionSelector::InsertInfo { + InsertInfo(MachineInstrBuilder &MIB) + : MBB(*MIB->getParent()), InsertBefore(std::next(MIB->getIterator())), + DbgLoc(MIB->getDebugLoc()) {} + + MachineBasicBlock &MBB; + const MachineBasicBlock::instr_iterator InsertBefore; + const DebugLoc &DbgLoc; +}; + +void ARMInstructionSelector::putConstant(InsertInfo I, unsigned DestReg, + unsigned Constant) const { + (void)BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVi)) + .addDef(DestReg) + .addImm(Constant) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); +} + +bool ARMInstructionSelector::validOpRegPair(MachineRegisterInfo &MRI, + unsigned LHSReg, unsigned RHSReg, + unsigned ExpectedSize, + unsigned ExpectedRegBankID) const { + return MRI.getType(LHSReg) == MRI.getType(RHSReg) && + validReg(MRI, LHSReg, ExpectedSize, ExpectedRegBankID) && + validReg(MRI, RHSReg, ExpectedSize, ExpectedRegBankID); +} + +bool ARMInstructionSelector::validReg(MachineRegisterInfo &MRI, unsigned Reg, + unsigned ExpectedSize, + unsigned ExpectedRegBankID) const { + if (MRI.getType(Reg).getSizeInBits() != ExpectedSize) { + DEBUG(dbgs() << "Unexpected size for register"); + return false; + } + + if (RBI.getRegBank(Reg, MRI, TRI)->getID() != ExpectedRegBankID) { + DEBUG(dbgs() << "Unexpected register bank for register"); + return false; + } + + return true; +} + +bool ARMInstructionSelector::selectCmp(CmpConstants Helper, + MachineInstrBuilder &MIB, + MachineRegisterInfo &MRI) const { + const InsertInfo I(MIB); + + auto ResReg = MIB->getOperand(0).getReg(); + if (!validReg(MRI, ResReg, 1, ARM::GPRRegBankID)) + return false; + + auto Cond = + static_cast<CmpInst::Predicate>(MIB->getOperand(1).getPredicate()); + if (Cond == CmpInst::FCMP_TRUE || Cond == CmpInst::FCMP_FALSE) { + putConstant(I, ResReg, Cond == CmpInst::FCMP_TRUE ? 1 : 0); + MIB->eraseFromParent(); + return true; + } + + auto LHSReg = MIB->getOperand(2).getReg(); + auto RHSReg = MIB->getOperand(3).getReg(); + if (!validOpRegPair(MRI, LHSReg, RHSReg, Helper.OperandSize, + Helper.OperandRegBankID)) + return false; + + auto ARMConds = getComparePreds(Cond); + auto ZeroReg = MRI.createVirtualRegister(&ARM::GPRRegClass); + putConstant(I, ZeroReg, 0); + + if (ARMConds.second == ARMCC::AL) { + // Simple case, we only need one comparison and we're done. + if (!insertComparison(Helper, I, ResReg, ARMConds.first, LHSReg, RHSReg, + ZeroReg)) + return false; + } else { + // Not so simple, we need two successive comparisons. + auto IntermediateRes = MRI.createVirtualRegister(&ARM::GPRRegClass); + if (!insertComparison(Helper, I, IntermediateRes, ARMConds.first, LHSReg, + RHSReg, ZeroReg)) + return false; + if (!insertComparison(Helper, I, ResReg, ARMConds.second, LHSReg, RHSReg, + IntermediateRes)) + return false; + } + + MIB->eraseFromParent(); + return true; +} + +bool ARMInstructionSelector::insertComparison(CmpConstants Helper, InsertInfo I, + unsigned ResReg, + ARMCC::CondCodes Cond, + unsigned LHSReg, unsigned RHSReg, + unsigned PrevRes) const { + // Perform the comparison. + auto CmpI = + BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(Helper.ComparisonOpcode)) + .addUse(LHSReg) + .addUse(RHSReg) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI)) + return false; + + // Read the comparison flags (if necessary). + if (Helper.ReadFlagsOpcode != ARM::INSTRUCTION_LIST_END) { + auto ReadI = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, + TII.get(Helper.ReadFlagsOpcode)) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*ReadI, TII, TRI, RBI)) + return false; + } + + // Select either 1 or the previous result based on the value of the flags. + auto Mov1I = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVCCi)) + .addDef(ResReg) + .addUse(PrevRes) + .addImm(1) + .add(predOps(Cond, ARM::CPSR)); + if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI)) + return false; + + return true; +} + +bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB, + MachineRegisterInfo &MRI) const { + auto &MBB = *MIB->getParent(); + auto InsertBefore = std::next(MIB->getIterator()); + auto &DbgLoc = MIB->getDebugLoc(); + + // Compare the condition to 0. + auto CondReg = MIB->getOperand(1).getReg(); + assert(validReg(MRI, CondReg, 1, ARM::GPRRegBankID) && + "Unsupported types for select operation"); + auto CmpI = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::CMPri)) + .addUse(CondReg) + .addImm(0) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI)) + return false; + + // Move a value into the result register based on the result of the + // comparison. + auto ResReg = MIB->getOperand(0).getReg(); + auto TrueReg = MIB->getOperand(2).getReg(); + auto FalseReg = MIB->getOperand(3).getReg(); + assert(validOpRegPair(MRI, ResReg, TrueReg, 32, ARM::GPRRegBankID) && + validOpRegPair(MRI, TrueReg, FalseReg, 32, ARM::GPRRegBankID) && + "Unsupported types for select operation"); + auto Mov1I = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::MOVCCr)) + .addDef(ResReg) + .addUse(TrueReg) + .addUse(FalseReg) + .add(predOps(ARMCC::EQ, ARM::CPSR)); + if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI)) + return false; + + MIB->eraseFromParent(); + return true; +} + bool ARMInstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -83,24 +540,211 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { return true; } + if (selectImpl(I)) + return true; + MachineInstrBuilder MIB{MF, I}; + bool isSExt = false; using namespace TargetOpcode; switch (I.getOpcode()) { - case G_ADD: + case G_SEXT: + isSExt = true; + LLVM_FALLTHROUGH; + case G_ZEXT: { + LLT DstTy = MRI.getType(I.getOperand(0).getReg()); + // FIXME: Smaller destination sizes coming soon! + if (DstTy.getSizeInBits() != 32) { + DEBUG(dbgs() << "Unsupported destination size for extension"); + return false; + } + + LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); + unsigned SrcSize = SrcTy.getSizeInBits(); + switch (SrcSize) { + case 1: { + // ZExt boils down to & 0x1; for SExt we also subtract that from 0 + I.setDesc(TII.get(ARM::ANDri)); + MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp()); + + if (isSExt) { + unsigned SExtResult = I.getOperand(0).getReg(); + + // Use a new virtual register for the result of the AND + unsigned AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass); + I.getOperand(0).setReg(AndResult); + + auto InsertBefore = std::next(I.getIterator()); + auto SubI = + BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::RSBri)) + .addDef(SExtResult) + .addUse(AndResult) + .addImm(0) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + if (!constrainSelectedInstRegOperands(*SubI, TII, TRI, RBI)) + return false; + } + break; + } + case 8: + case 16: { + unsigned NewOpc = selectSimpleExtOpc(I.getOpcode(), SrcSize); + if (NewOpc == I.getOpcode()) + return false; + I.setDesc(TII.get(NewOpc)); + MIB.addImm(0).add(predOps(ARMCC::AL)); + break; + } + default: + DEBUG(dbgs() << "Unsupported source size for extension"); + return false; + } + break; + } + case G_ANYEXT: + case G_TRUNC: { + // The high bits are undefined, so there's nothing special to do, just + // treat it as a copy. + auto SrcReg = I.getOperand(1).getReg(); + auto DstReg = I.getOperand(0).getReg(); + + const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); + const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); + + if (SrcRegBank.getID() != DstRegBank.getID()) { + DEBUG(dbgs() << "G_TRUNC/G_ANYEXT operands on different register banks\n"); + return false; + } + + if (SrcRegBank.getID() != ARM::GPRRegBankID) { + DEBUG(dbgs() << "G_TRUNC/G_ANYEXT on non-GPR not supported yet\n"); + return false; + } + + I.setDesc(TII.get(COPY)); + return selectCopy(I, TII, MRI, TRI, RBI); + } + case G_SELECT: + return selectSelect(MIB, MRI); + case G_ICMP: { + CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END, + ARM::GPRRegBankID, 32); + return selectCmp(Helper, MIB, MRI); + } + case G_FCMP: { + assert(TII.getSubtarget().hasVFP2() && "Can't select fcmp without VFP"); + + unsigned OpReg = I.getOperand(2).getReg(); + unsigned Size = MRI.getType(OpReg).getSizeInBits(); + + if (Size == 64 && TII.getSubtarget().isFPOnlySP()) { + DEBUG(dbgs() << "Subtarget only supports single precision"); + return false; + } + if (Size != 32 && Size != 64) { + DEBUG(dbgs() << "Unsupported size for G_FCMP operand"); + return false; + } + + CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT, + ARM::FPRRegBankID, Size); + return selectCmp(Helper, MIB, MRI); + } + case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); - AddDefaultCC(AddDefaultPred(MIB)); + MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); break; case G_FRAME_INDEX: // Add 0 to the given frame index and hope it will eventually be folded into // the user(s). I.setDesc(TII.get(ARM::ADDri)); - AddDefaultCC(AddDefaultPred(MIB.addImm(0))); + MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp()); + break; + case G_CONSTANT: { + unsigned Reg = I.getOperand(0).getReg(); + + if (!validReg(MRI, Reg, 32, ARM::GPRRegBankID)) + return false; + + I.setDesc(TII.get(ARM::MOVi)); + MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); + + auto &Val = I.getOperand(1); + if (Val.isCImm()) { + if (Val.getCImm()->getBitWidth() > 32) + return false; + Val.ChangeToImmediate(Val.getCImm()->getZExtValue()); + } + + if (!Val.isImm()) { + return false; + } + + break; + } + case G_STORE: + case G_LOAD: { + const auto &MemOp = **I.memoperands_begin(); + if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) { + DEBUG(dbgs() << "Atomic load/store not supported yet\n"); + return false; + } + + unsigned Reg = I.getOperand(0).getReg(); + unsigned RegBank = RBI.getRegBank(Reg, MRI, TRI)->getID(); + + LLT ValTy = MRI.getType(Reg); + const auto ValSize = ValTy.getSizeInBits(); + + assert((ValSize != 64 || TII.getSubtarget().hasVFP2()) && + "Don't know how to load/store 64-bit value without VFP"); + + const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize); + if (NewOpc == G_LOAD || NewOpc == G_STORE) + return false; + + I.setDesc(TII.get(NewOpc)); + + if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH) + // LDRH has a funny addressing mode (there's already a FIXME for it). + MIB.addReg(0); + MIB.addImm(0).add(predOps(ARMCC::AL)); break; - case G_LOAD: - I.setDesc(TII.get(ARM::LDRi12)); - AddDefaultPred(MIB.addImm(0)); + } + case G_MERGE_VALUES: { + if (!selectMergeValues(MIB, TII, MRI, TRI, RBI)) + return false; + break; + } + case G_UNMERGE_VALUES: { + if (!selectUnmergeValues(MIB, TII, MRI, TRI, RBI)) + return false; break; + } + case G_BRCOND: { + if (!validReg(MRI, I.getOperand(0).getReg(), 1, ARM::GPRRegBankID)) { + DEBUG(dbgs() << "Unsupported condition register for G_BRCOND"); + return false; + } + + // Set the flags. + auto Test = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::TSTri)) + .addReg(I.getOperand(0).getReg()) + .addImm(1) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*Test, TII, TRI, RBI)) + return false; + + // Branch conditionally. + auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc)) + .add(I.getOperand(1)) + .add(predOps(ARMCC::EQ, ARM::CPSR)); + if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI)) + return false; + I.eraseFromParent(); + return true; + } default: return false; } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h deleted file mode 100644 index 5072cdd..0000000 --- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h +++ /dev/null @@ -1,39 +0,0 @@ -//===- ARMInstructionSelector ------------------------------------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file declares the targeting of the InstructionSelector class for ARM. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H -#define LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H - -#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" - -namespace llvm { -class ARMBaseInstrInfo; -class ARMBaseRegisterInfo; -class ARMBaseTargetMachine; -class ARMRegisterBankInfo; -class ARMSubtarget; - -class ARMInstructionSelector : public InstructionSelector { -public: - ARMInstructionSelector(const ARMSubtarget &STI, - const ARMRegisterBankInfo &RBI); - - virtual bool select(MachineInstr &I) const override; - -private: - const ARMBaseInstrInfo &TII; - const ARMBaseRegisterInfo &TRI; - const ARMRegisterBankInfo &RBI; -}; - -} // End llvm namespace. -#endif diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index 255ea4b..1c17c07 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -12,6 +12,11 @@ //===----------------------------------------------------------------------===// #include "ARMLegalizerInfo.h" +#include "ARMCallLowering.h" +#include "ARMSubtarget.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" @@ -23,22 +28,328 @@ using namespace llvm; #error "You shouldn't build this" #endif -ARMLegalizerInfo::ARMLegalizerInfo() { +static bool AEABI(const ARMSubtarget &ST) { + return ST.isTargetAEABI() || ST.isTargetGNUAEABI() || ST.isTargetMuslAEABI(); +} + +ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { using namespace TargetOpcode; const LLT p0 = LLT::pointer(0, 32); + const LLT s1 = LLT::scalar(1); const LLT s8 = LLT::scalar(8); const LLT s16 = LLT::scalar(16); const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); setAction({G_FRAME_INDEX, p0}, Legal); - setAction({G_LOAD, s32}, Legal); - setAction({G_LOAD, 1, p0}, Legal); + for (unsigned Op : {G_LOAD, G_STORE}) { + for (auto Ty : {s1, s8, s16, s32, p0}) + setAction({Op, Ty}, Legal); + setAction({Op, 1, p0}, Legal); + } + + for (unsigned Op : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) { + for (auto Ty : {s1, s8, s16}) + setAction({Op, Ty}, WidenScalar); + setAction({Op, s32}, Legal); + } + + for (unsigned Op : {G_SDIV, G_UDIV}) { + for (auto Ty : {s8, s16}) + setAction({Op, Ty}, WidenScalar); + if (ST.hasDivideInARMMode()) + setAction({Op, s32}, Legal); + else + setAction({Op, s32}, Libcall); + } + + for (unsigned Op : {G_SREM, G_UREM}) { + for (auto Ty : {s8, s16}) + setAction({Op, Ty}, WidenScalar); + if (ST.hasDivideInARMMode()) + setAction({Op, s32}, Lower); + else if (AEABI(ST)) + setAction({Op, s32}, Custom); + else + setAction({Op, s32}, Libcall); + } + + for (unsigned Op : {G_SEXT, G_ZEXT}) { + setAction({Op, s32}, Legal); + for (auto Ty : {s1, s8, s16}) + setAction({Op, 1, Ty}, Legal); + } + + setAction({G_GEP, p0}, Legal); + setAction({G_GEP, 1, s32}, Legal); + + setAction({G_SELECT, s32}, Legal); + setAction({G_SELECT, p0}, Legal); + setAction({G_SELECT, 1, s1}, Legal); + + setAction({G_BRCOND, s1}, Legal); + + setAction({G_CONSTANT, s32}, Legal); + for (auto Ty : {s1, s8, s16}) + setAction({G_CONSTANT, Ty}, WidenScalar); + + setAction({G_ICMP, s1}, Legal); + for (auto Ty : {s8, s16}) + setAction({G_ICMP, 1, Ty}, WidenScalar); + for (auto Ty : {s32, p0}) + setAction({G_ICMP, 1, Ty}, Legal); - for (auto Ty : {s8, s16, s32}) - setAction({G_ADD, Ty}, Legal); + if (!ST.useSoftFloat() && ST.hasVFP2()) { + setAction({G_FADD, s32}, Legal); + setAction({G_FADD, s64}, Legal); + + setAction({G_LOAD, s64}, Legal); + setAction({G_STORE, s64}, Legal); + + setAction({G_FCMP, s1}, Legal); + setAction({G_FCMP, 1, s32}, Legal); + setAction({G_FCMP, 1, s64}, Legal); + } else { + for (auto Ty : {s32, s64}) + setAction({G_FADD, Ty}, Libcall); + + setAction({G_FCMP, s1}, Legal); + setAction({G_FCMP, 1, s32}, Custom); + setAction({G_FCMP, 1, s64}, Custom); + + if (AEABI(ST)) + setFCmpLibcallsAEABI(); + else + setFCmpLibcallsGNU(); + } + + for (unsigned Op : {G_FREM, G_FPOW}) + for (auto Ty : {s32, s64}) + setAction({Op, Ty}, Libcall); computeTables(); } + +void ARMLegalizerInfo::setFCmpLibcallsAEABI() { + // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be + // default-initialized. + FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp32Libcalls[CmpInst::FCMP_OEQ] = { + {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OGE] = { + {RTLIB::OGE_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OGT] = { + {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OLE] = { + {RTLIB::OLE_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OLT] = { + {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UNO] = { + {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_ONE] = { + {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_UEQ] = { + {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}}; + + FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp64Libcalls[CmpInst::FCMP_OEQ] = { + {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OGE] = { + {RTLIB::OGE_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OGT] = { + {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OLE] = { + {RTLIB::OLE_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OLT] = { + {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UNO] = { + {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_ONE] = { + {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_UEQ] = { + {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}}; +} + +void ARMLegalizerInfo::setFCmpLibcallsGNU() { + // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be + // default-initialized. + FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp32Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F32, CmpInst::ICMP_SGE}}; + FCmp32Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}}; + FCmp32Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F32, CmpInst::ICMP_SLE}}; + FCmp32Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F32, CmpInst::ICMP_SLT}}; + FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_SGE}}; + FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_SGT}}; + FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SLE}}; + FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_SLT}}; + FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_NE}}; + FCmp32Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F32, CmpInst::ICMP_NE}}; + FCmp32Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}, + {RTLIB::OLT_F32, CmpInst::ICMP_SLT}}; + FCmp32Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ}, + {RTLIB::UO_F32, CmpInst::ICMP_NE}}; + + FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp64Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F64, CmpInst::ICMP_SGE}}; + FCmp64Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}}; + FCmp64Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F64, CmpInst::ICMP_SLE}}; + FCmp64Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F64, CmpInst::ICMP_SLT}}; + FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_SGE}}; + FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_SGT}}; + FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SLE}}; + FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_SLT}}; + FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_NE}}; + FCmp64Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F64, CmpInst::ICMP_NE}}; + FCmp64Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}, + {RTLIB::OLT_F64, CmpInst::ICMP_SLT}}; + FCmp64Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ}, + {RTLIB::UO_F64, CmpInst::ICMP_NE}}; +} + +ARMLegalizerInfo::FCmpLibcallsList +ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate, + unsigned Size) const { + assert(CmpInst::isFPPredicate(Predicate) && "Unsupported FCmp predicate"); + if (Size == 32) + return FCmp32Libcalls[Predicate]; + if (Size == 64) + return FCmp64Libcalls[Predicate]; + llvm_unreachable("Unsupported size for FCmp predicate"); +} + +bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + using namespace TargetOpcode; + + MIRBuilder.setInstr(MI); + + switch (MI.getOpcode()) { + default: + return false; + case G_SREM: + case G_UREM: { + unsigned OriginalResult = MI.getOperand(0).getReg(); + auto Size = MRI.getType(OriginalResult).getSizeInBits(); + if (Size != 32) + return false; + + auto Libcall = + MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; + + // Our divmod libcalls return a struct containing the quotient and the + // remainder. We need to create a virtual register for it. + auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + Type *ArgTy = Type::getInt32Ty(Ctx); + StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true); + auto RetVal = MRI.createGenericVirtualRegister( + getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout())); + + auto Status = createLibcall(MIRBuilder, Libcall, {RetVal, RetTy}, + {{MI.getOperand(1).getReg(), ArgTy}, + {MI.getOperand(2).getReg(), ArgTy}}); + if (Status != LegalizerHelper::Legalized) + return false; + + // The remainder is the second result of divmod. Split the return value into + // a new, unused register for the quotient and the destination of the + // original instruction for the remainder. + MIRBuilder.buildUnmerge( + {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult}, + RetVal); + break; + } + case G_FCMP: { + assert(MRI.getType(MI.getOperand(2).getReg()) == + MRI.getType(MI.getOperand(3).getReg()) && + "Mismatched operands for G_FCMP"); + auto OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); + + auto OriginalResult = MI.getOperand(0).getReg(); + auto Predicate = + static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + auto Libcalls = getFCmpLibcalls(Predicate, OpSize); + + if (Libcalls.empty()) { + assert((Predicate == CmpInst::FCMP_TRUE || + Predicate == CmpInst::FCMP_FALSE) && + "Predicate needs libcalls, but none specified"); + MIRBuilder.buildConstant(OriginalResult, + Predicate == CmpInst::FCMP_TRUE ? 1 : 0); + MI.eraseFromParent(); + return true; + } + + auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + assert((OpSize == 32 || OpSize == 64) && "Unsupported operand size"); + auto *ArgTy = OpSize == 32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx); + auto *RetTy = Type::getInt32Ty(Ctx); + + SmallVector<unsigned, 2> Results; + for (auto Libcall : Libcalls) { + auto LibcallResult = MRI.createGenericVirtualRegister(LLT::scalar(32)); + auto Status = + createLibcall(MIRBuilder, Libcall.LibcallID, {LibcallResult, RetTy}, + {{MI.getOperand(2).getReg(), ArgTy}, + {MI.getOperand(3).getReg(), ArgTy}}); + + if (Status != LegalizerHelper::Legalized) + return false; + + auto ProcessedResult = + Libcalls.size() == 1 + ? OriginalResult + : MRI.createGenericVirtualRegister(MRI.getType(OriginalResult)); + + // We have a result, but we need to transform it into a proper 1-bit 0 or + // 1, taking into account the different peculiarities of the values + // returned by the comparison functions. + CmpInst::Predicate ResultPred = Libcall.Predicate; + if (ResultPred == CmpInst::BAD_ICMP_PREDICATE) { + // We have a nice 0 or 1, and we just need to truncate it back to 1 bit + // to keep the types consistent. + MIRBuilder.buildTrunc(ProcessedResult, LibcallResult); + } else { + // We need to compare against 0. + assert(CmpInst::isIntPredicate(ResultPred) && "Unsupported predicate"); + auto Zero = MRI.createGenericVirtualRegister(LLT::scalar(32)); + MIRBuilder.buildConstant(Zero, 0); + MIRBuilder.buildICmp(ResultPred, ProcessedResult, LibcallResult, Zero); + } + Results.push_back(ProcessedResult); + } + + if (Results.size() != 1) { + assert(Results.size() == 2 && "Unexpected number of results"); + MIRBuilder.buildOr(OriginalResult, Results[0], Results[1]); + } + break; + } + } + + MI.eraseFromParent(); + return true; +} diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h index ca3eea8..78ab941 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h @@ -14,16 +14,52 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H #define LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H +#include "llvm/ADT/IndexedMap.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/IR/Instructions.h" namespace llvm { -class LLVMContext; +class ARMSubtarget; /// This class provides the information for the target register banks. class ARMLegalizerInfo : public LegalizerInfo { public: - ARMLegalizerInfo(); + ARMLegalizerInfo(const ARMSubtarget &ST); + + bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const override; + +private: + void setFCmpLibcallsGNU(); + void setFCmpLibcallsAEABI(); + + struct FCmpLibcallInfo { + // Which libcall this is. + RTLIB::Libcall LibcallID; + + // The predicate to be used when comparing the value returned by the + // function with a relevant constant (currently hard-coded to zero). This is + // necessary because often the libcall will return e.g. a value greater than + // 0 to represent 'true' and anything negative to represent 'false', or + // maybe 0 to represent 'true' and non-zero for 'false'. If no comparison is + // needed, this should be CmpInst::BAD_ICMP_PREDICATE. + CmpInst::Predicate Predicate; + }; + using FCmpLibcallsList = SmallVector<FCmpLibcallInfo, 2>; + + // Map from each FCmp predicate to the corresponding libcall infos. A FCmp + // instruction may be lowered to one or two libcalls, which is why we need a + // list. If two libcalls are needed, their results will be OR'ed. + using FCmpLibcallsMapTy = IndexedMap<FCmpLibcallsList>; + + FCmpLibcallsMapTy FCmp32Libcalls; + FCmpLibcallsMapTy FCmp64Libcalls; + + // Get the libcall(s) corresponding to \p Predicate for operands of \p Size + // bits. + FCmpLibcallsList getFCmpLibcalls(CmpInst::Predicate, unsigned Size) const; }; } // End llvm namespace. #endif diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 48ab491..7a452d4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -33,7 +34,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -517,8 +517,12 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, if (InsertSub) { // An instruction above couldn't be updated, so insert a sub. - AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) - .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); + BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base) + .add(t1CondCodeOp(true)) + .addReg(Base) + .addImm(WordOffset * 4) + .addImm(Pred) + .addReg(PredReg); return; } @@ -534,9 +538,12 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, // information and *always* have to reset at the end of a block. // See PR21029. if (MBBI != MBB.end()) --MBBI; - AddDefaultT1CC( - BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) - .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); + BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base) + .add(t1CondCodeOp(true)) + .addReg(Base) + .addImm(WordOffset * 4) + .addImm(Pred) + .addReg(PredReg); } } @@ -602,13 +609,12 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti( // Exception: If the base register is in the input reglist, Thumb1 LDM is // non-writeback. // It's also not possible to merge an STR of the base register in Thumb1. - if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { + if (isThumb1 && ContainsReg(Regs, Base)) { assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); - if (Opcode == ARM::tLDRi) { + if (Opcode == ARM::tLDRi) Writeback = false; - } else if (Opcode == ARM::tSTRi) { + else if (Opcode == ARM::tSTRi) return nullptr; - } } ARM_AM::AMSubMode Mode = ARM_AM::ia; @@ -700,8 +706,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti( .addReg(Base, getKillRegState(KillOldBase)); } else BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) - .addReg(Base, getKillRegState(KillOldBase)) - .addImm(Pred).addReg(PredReg); + .addReg(Base, getKillRegState(KillOldBase)) + .add(predOps(Pred, PredReg)); // The following ADDS/SUBS becomes an update. Base = NewBase; @@ -710,17 +716,21 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti( if (BaseOpc == ARM::tADDrSPi) { assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) - .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) - .addImm(Pred).addReg(PredReg); + .addReg(Base, getKillRegState(KillOldBase)) + .addImm(Offset / 4) + .add(predOps(Pred, PredReg)); } else - AddDefaultT1CC( - BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) - .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) - .addImm(Pred).addReg(PredReg); + BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) + .add(t1CondCodeOp(true)) + .addReg(Base, getKillRegState(KillOldBase)) + .addImm(Offset) + .add(predOps(Pred, PredReg)); } else { BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) - .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) - .addImm(Pred).addReg(PredReg).addReg(0); + .addReg(Base, getKillRegState(KillOldBase)) + .addImm(Offset) + .add(predOps(Pred, PredReg)) + .add(condCodeOp()); } Base = NewBase; BaseKill = true; // New base is always killed straight away. @@ -1259,7 +1269,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { // Transfer the rest of operands. for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) - MIB.addOperand(MI->getOperand(OpNum)); + MIB.add(MI->getOperand(OpNum)); // Transfer memoperands. MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); @@ -1392,14 +1402,19 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { } else { int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) - .addReg(Base, RegState::Define) - .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); + .addReg(Base, RegState::Define) + .addReg(Base) + .addReg(0) + .addImm(Imm) + .add(predOps(Pred, PredReg)); } } else { // t2LDR_PRE, t2LDR_POST BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) - .addReg(Base, RegState::Define) - .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); + .addReg(Base, RegState::Define) + .addReg(Base) + .addImm(Offset) + .add(predOps(Pred, PredReg)); } } else { MachineOperand &MO = MI->getOperand(0); @@ -1410,13 +1425,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); // STR_PRE, STR_POST BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) - .addReg(MO.getReg(), getKillRegState(MO.isKill())) - .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); + .addReg(MO.getReg(), getKillRegState(MO.isKill())) + .addReg(Base) + .addReg(0) + .addImm(Imm) + .add(predOps(Pred, PredReg)); } else { // t2STR_PRE, t2STR_POST BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) - .addReg(MO.getReg(), getKillRegState(MO.isKill())) - .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); + .addReg(MO.getReg(), getKillRegState(MO.isKill())) + .addReg(Base) + .addImm(Offset) + .add(predOps(Pred, PredReg)); } } MBB.erase(MBBI); @@ -1462,12 +1482,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { DebugLoc DL = MI.getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { - MIB.addOperand(Reg0Op).addOperand(Reg1Op) - .addReg(BaseOp.getReg(), RegState::Define); + MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define); } else { assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); - MIB.addReg(BaseOp.getReg(), RegState::Define) - .addOperand(Reg0Op).addOperand(Reg1Op); + MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op); } MIB.addReg(BaseOp.getReg(), RegState::Kill) .addImm(Offset).addImm(Pred).addReg(PredReg); @@ -1477,7 +1495,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { // Transfer implicit operands. for (const MachineOperand &MO : MI.implicit_operands()) - MIB.addOperand(MO); + MIB.add(MO); MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MBB.erase(MBBI); @@ -1891,8 +1909,9 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { for (auto Use : Prev->uses()) if (Use.isKill()) { - AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) - .addReg(Use.getReg(), RegState::Kill)) + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) + .addReg(Use.getReg(), RegState::Kill) + .add(predOps(ARMCC::AL)) .copyImplicitOps(*MBBI); MBB.erase(MBBI); MBB.erase(Prev); @@ -1942,6 +1961,7 @@ namespace { static char ID; ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} + AliasAnalysis *AA; const DataLayout *TD; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -1955,6 +1975,11 @@ namespace { return ARM_PREALLOC_LOAD_STORE_OPT_NAME; } + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AAResultsWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + private: bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc, unsigned &EvenReg, @@ -1984,6 +2009,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { TRI = STI->getRegisterInfo(); MRI = &Fn.getRegInfo(); MF = &Fn; + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); bool Modified = false; for (MachineBasicBlock &MFI : Fn) @@ -1997,28 +2023,19 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator E, SmallPtrSetImpl<MachineInstr*> &MemOps, SmallSet<unsigned, 4> &MemRegs, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + AliasAnalysis *AA) { // Are there stores / loads / calls between them? - // FIXME: This is overly conservative. We should make use of alias information - // some day. SmallSet<unsigned, 4> AddedRegPressure; while (++I != E) { if (I->isDebugValue() || MemOps.count(&*I)) continue; if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) return false; - if (isLd && I->mayStore()) - return false; - if (!isLd) { - if (I->mayLoad()) - return false; - // It's not safe to move the first 'str' down. - // str r1, [r0] - // strh r5, [r0] - // str r4, [r0, #+4] - if (I->mayStore()) - return false; - } + if (I->mayStore() || (!isLd && I->mayLoad())) + for (MachineInstr *MemOp : MemOps) + if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false)) + return false; for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { MachineOperand &MO = I->getOperand(j); if (!MO.isReg()) @@ -2142,33 +2159,40 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, unsigned LastBytes = 0; unsigned NumMove = 0; for (int i = Ops.size() - 1; i >= 0; --i) { + // Make sure each operation has the same kind. MachineInstr *Op = Ops[i]; - unsigned Loc = MI2LocMap[Op]; - if (Loc <= FirstLoc) { - FirstLoc = Loc; - FirstOp = Op; - } - if (Loc >= LastLoc) { - LastLoc = Loc; - LastOp = Op; - } - unsigned LSMOpcode = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); if (LastOpcode && LSMOpcode != LastOpcode) break; + // Check that we have a continuous set of offsets. int Offset = getMemoryOpOffset(*Op); unsigned Bytes = getLSMultipleTransferSize(Op); if (LastBytes) { if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) break; } + + // Don't try to reschedule too many instructions. + if (NumMove == 8) // FIXME: Tune this limit. + break; + + // Found a mergable instruction; save information about it. + ++NumMove; LastOffset = Offset; LastBytes = Bytes; LastOpcode = LSMOpcode; - if (++NumMove == 8) // FIXME: Tune this limit. - break; + + unsigned Loc = MI2LocMap[Op]; + if (Loc <= FirstLoc) { + FirstLoc = Loc; + FirstOp = Op; + } + if (Loc >= LastLoc) { + LastLoc = Loc; + LastOp = Op; + } } if (NumMove <= 1) @@ -2176,7 +2200,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, else { SmallPtrSet<MachineInstr*, 4> MemOps; SmallSet<unsigned, 4> MemRegs; - for (int i = NumMove-1; i >= 0; --i) { + for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) { MemOps.insert(Ops[i]); MemRegs.insert(Ops[i]->getOperand(0).getReg()); } @@ -2186,7 +2210,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. if (DoMove) DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, - MemOps, MemRegs, TRI); + MemOps, MemRegs, TRI, AA); if (!DoMove) { for (unsigned i = 0; i != NumMove; ++i) Ops.pop_back(); diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp index 07044b9..48b02d4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -14,23 +14,36 @@ #include "ARM.h" #include "ARMAsmPrinter.h" +#include "ARMBaseInstrInfo.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCExpr.h" +#include "llvm/ADT/APFloat.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCStreamer.h" -using namespace llvm; +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstdint> +using namespace llvm; MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol) { + MCSymbolRefExpr::VariantKind SymbolVariant = MCSymbolRefExpr::VK_None; + if (MO.getTargetFlags() & ARMII::MO_SBREL) + SymbolVariant = MCSymbolRefExpr::VK_ARM_SBREL; + const MCExpr *Expr = - MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); + MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext); switch (MO.getTargetFlags() & ARMII::MO_OPTION_MASK) { default: llvm_unreachable("Unknown target flag on symbol operand"); @@ -38,12 +51,12 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, break; case ARMII::MO_LO16: Expr = - MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); + MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext); Expr = ARMMCExpr::createLower16(Expr, OutContext); break; case ARMII::MO_HI16: Expr = - MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); + MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext); Expr = ARMMCExpr::createUpper16(Expr, OutContext); break; } @@ -75,11 +88,10 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( MO.getMBB()->getSymbol(), OutContext)); break; - case MachineOperand::MO_GlobalAddress: { + case MachineOperand::MO_GlobalAddress: MCOp = GetSymbolRef(MO, GetARMGVSymbol(MO.getGlobal(), MO.getTargetFlags())); break; - } case MachineOperand::MO_ExternalSymbol: MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); @@ -141,9 +153,7 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, break; } - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - + for (const MachineOperand &MO : MI->operands()) { MCOperand MCOp; if (AP.lowerOperand(MO, MCOp)) { if (MCOp.isImm() && EncodeImms) { @@ -199,11 +209,9 @@ void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) .addImm(ARMCC::AL).addReg(0)); MCInst Noop; - Subtarget->getInstrInfo()->getNoopForElfTarget(Noop); + Subtarget->getInstrInfo()->getNoop(Noop); for (int8_t I = 0; I < NoopsInSledCount; I++) - { OutStreamer->EmitInstruction(Noop, getSubtargetInfo()); - } OutStreamer->EmitLabel(Target); recordSled(CurSled, MI, Kind); diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp index 50d8f09..e25d36b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" using namespace llvm; @@ -15,10 +16,4 @@ void ARMFunctionInfo::anchor() {} ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()), - hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()), - StByValParamsPadding(0), ArgRegsSaveSize(0), ReturnRegsCount(0), - HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), - FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), - GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false), ArgumentStackSize(0), - IsSplitCSR(false), PromotedGlobalsIncrease(0) {} + hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()) {} diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index 8c485e8..8161167 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -14,11 +14,11 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H -#include "ARMSubtarget.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include <utility> namespace llvm { @@ -29,42 +29,42 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// isThumb - True if this function is compiled under Thumb mode. /// Used to initialized Align, so must precede it. - bool isThumb; + bool isThumb = false; /// hasThumb2 - True if the target architecture supports Thumb2. Do not use /// to determine if function is compiled under Thumb mode, for that use /// 'isThumb'. - bool hasThumb2; + bool hasThumb2 = false; /// StByValParamsPadding - For parameter that is split between /// GPRs and memory; while recovering GPRs part, when /// StackAlignment > 4, and GPRs-part-size mod StackAlignment != 0, /// we need to insert gap before parameter start address. It allows to /// "attach" GPR-part to the part that was passed via stack. - unsigned StByValParamsPadding; + unsigned StByValParamsPadding = 0; /// VarArgsRegSaveSize - Size of the register save area for vararg functions. /// - unsigned ArgRegsSaveSize; + unsigned ArgRegsSaveSize = 0; /// ReturnRegsCount - Number of registers used up in the return. - unsigned ReturnRegsCount; + unsigned ReturnRegsCount = 0; /// HasStackFrame - True if this function has a stack frame. Set by /// determineCalleeSaves(). - bool HasStackFrame; + bool HasStackFrame = false; /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by /// emitPrologue. - bool RestoreSPFromFP; + bool RestoreSPFromFP = false; /// LRSpilledForFarJump - True if the LR register has been for spilled to /// enable far jump. - bool LRSpilledForFarJump; + bool LRSpilledForFarJump = false; /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer /// spill stack offset. - unsigned FramePtrSpillOffset; + unsigned FramePtrSpillOffset = 0; /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved /// register spills areas. For Mac OS X: @@ -77,16 +77,16 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// /// Also see AlignedDPRCSRegs below. Not all D-regs need to go in area 3. /// Some may be spilled after the stack has been realigned. - unsigned GPRCS1Offset; - unsigned GPRCS2Offset; - unsigned DPRCSOffset; + unsigned GPRCS1Offset = 0; + unsigned GPRCS2Offset = 0; + unsigned DPRCSOffset = 0; /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills /// areas. - unsigned GPRCS1Size; - unsigned GPRCS2Size; - unsigned DPRCSAlignGapSize; - unsigned DPRCSSize; + unsigned GPRCS1Size = 0; + unsigned GPRCS2Size = 0; + unsigned DPRCSAlignGapSize = 0; + unsigned DPRCSSize = 0; /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in /// the aligned portion of the stack frame. This is always a contiguous @@ -95,15 +95,15 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// We do not keep track of the frame indices used for these registers - they /// behave like any other frame index in the aligned stack frame. These /// registers also aren't included in DPRCSSize above. - unsigned NumAlignedDPRCS2Regs; + unsigned NumAlignedDPRCS2Regs = 0; - unsigned PICLabelUId; + unsigned PICLabelUId = 0; /// VarArgsFrameIndex - FrameIndex for start of varargs area. - int VarArgsFrameIndex; + int VarArgsFrameIndex = 0; /// HasITBlocks - True if IT blocks have been inserted. - bool HasITBlocks; + bool HasITBlocks = false; /// CPEClones - Track constant pool entries clones created by Constant Island /// pass. @@ -111,7 +111,7 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// ArgumentStackSize - amount of bytes on stack consumed by the arguments /// being passed on the stack - unsigned ArgumentStackSize; + unsigned ArgumentStackSize = 0; /// CoalescedWeights - mapping of basic blocks to the rolling counter of /// coalesced weights. @@ -119,26 +119,16 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// True if this function has a subset of CSRs that is handled explicitly via /// copies. - bool IsSplitCSR; + bool IsSplitCSR = false; /// Globals that have had their storage promoted into the constant pool. SmallPtrSet<const GlobalVariable*,2> PromotedGlobals; /// The amount the literal pool has been increasedby due to promoted globals. - int PromotedGlobalsIncrease; + int PromotedGlobalsIncrease = 0; public: - ARMFunctionInfo() : - isThumb(false), - hasThumb2(false), - ArgRegsSaveSize(0), ReturnRegsCount(0), HasStackFrame(false), - RestoreSPFromFP(false), - LRSpilledForFarJump(false), - FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), - GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0), - NumAlignedDPRCS2Regs(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false), - PromotedGlobalsIncrease(0) {} + ARMFunctionInfo() = default; explicit ARMFunctionInfo(MachineFunction &MF); @@ -250,6 +240,7 @@ public: PromotedGlobalsIncrease = Sz; } }; -} // End llvm namespace -#endif +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H diff --git a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp new file mode 100644 index 0000000..1b6e97c --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp @@ -0,0 +1,57 @@ +//===- ARMMacroFusion.cpp - ARM Macro Fusion ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the ARM implementation of the DAG scheduling +/// mutation to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "ARMMacroFusion.h" +#include "ARMSubtarget.h" +#include "llvm/CodeGen/MacroFusion.h" +#include "llvm/Target/TargetInstrInfo.h" + +namespace llvm { + +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + const TargetSubtargetInfo &TSI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(TSI); + + // Assume wildcards for unspecified instrs. + unsigned FirstOpcode = + FirstMI ? FirstMI->getOpcode() + : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END); + unsigned SecondOpcode = SecondMI.getOpcode(); + + if (ST.hasFuseAES()) + // Fuse AES crypto operations. + switch(SecondOpcode) { + // AES encode. + case ARM::AESMC : + return FirstOpcode == ARM::AESE || + FirstOpcode == ARM::INSTRUCTION_LIST_END; + // AES decode. + case ARM::AESIMC: + return FirstOpcode == ARM::AESD || + FirstOpcode == ARM::INSTRUCTION_LIST_END; + } + + return false; +} + +std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation () { + return createMacroFusionDAGMutation(shouldScheduleAdjacent); +} + +} // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h new file mode 100644 index 0000000..1e4fc66 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h @@ -0,0 +1,24 @@ +//===- ARMMacroFusion.h - ARM Macro Fusion ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the ARM definition of the DAG scheduling mutation +/// to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// Note that you have to add: +/// DAG.addMutation(createARMMacroFusionDAGMutation()); +/// to ARMPassConfig::createMachineScheduler() to have an effect. +std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation(); + +} // llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp index 581d5fe..7e4d598 100644 --- a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp @@ -88,13 +88,15 @@ bool ARMOptimizeBarriersPass::runOnMachineFunction(MachineFunction &MF) { } } } + bool Changed = false; // Remove the tagged DMB for (auto MI : ToRemove) { MI->eraseFromParent(); ++NumDMBsRemoved; + Changed = true; } - return NumDMBsRemoved > 0; + return Changed; } /// createARMOptimizeBarriersPass - Returns an instance of the remove double diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp index 324087d..8449302 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -13,11 +13,15 @@ #include "ARMRegisterBankInfo.h" #include "ARMInstrInfo.h" // For the register classes +#include "ARMSubtarget.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#define GET_TARGET_REGBANK_IMPL +#include "ARMGenRegisterBank.inc" + using namespace llvm; #ifndef LLVM_BUILD_GLOBAL_ISEL @@ -29,44 +33,109 @@ using namespace llvm; // into an ARMGenRegisterBankInfo.def (similar to AArch64). namespace llvm { namespace ARM { -const uint32_t GPRCoverageData[] = { - // Classes 0-31 - (1u << ARM::GPRRegClassID) | (1u << ARM::GPRwithAPSRRegClassID) | - (1u << ARM::GPRnopcRegClassID) | (1u << ARM::rGPRRegClassID) | - (1u << ARM::hGPRRegClassID) | (1u << ARM::tGPRRegClassID) | - (1u << ARM::GPRnopc_and_hGPRRegClassID) | - (1u << ARM::hGPR_and_rGPRRegClassID) | (1u << ARM::tcGPRRegClassID) | - (1u << ARM::tGPR_and_tcGPRRegClassID) | (1u << ARM::GPRspRegClassID) | - (1u << ARM::hGPR_and_tcGPRRegClassID), - // Classes 32-63 - 0, - // Classes 64-96 - 0, - // FIXME: Some of the entries below this point can be safely removed once - // this is tablegenerated. It's only needed because of the hardcoded - // register class limit. - // Classes 97-128 - 0, - // Classes 129-160 - 0, - // Classes 161-192 - 0, - // Classes 193-224 - 0, +enum PartialMappingIdx { + PMI_GPR, + PMI_SPR, + PMI_DPR, + PMI_Min = PMI_GPR, +}; + +RegisterBankInfo::PartialMapping PartMappings[]{ + // GPR Partial Mapping + {0, 32, GPRRegBank}, + // SPR Partial Mapping + {0, 32, FPRRegBank}, + // DPR Partial Mapping + {0, 64, FPRRegBank}, }; -RegisterBank GPRRegBank(ARM::GPRRegBankID, "GPRB", 32, ARM::GPRCoverageData); -RegisterBank *RegBanks[] = {&GPRRegBank}; +#ifndef NDEBUG +static bool checkPartMapping(const RegisterBankInfo::PartialMapping &PM, + unsigned Start, unsigned Length, + unsigned RegBankID) { + return PM.StartIdx == Start && PM.Length == Length && + PM.RegBank->getID() == RegBankID; +} + +static void checkPartialMappings() { + assert( + checkPartMapping(PartMappings[PMI_GPR - PMI_Min], 0, 32, GPRRegBankID) && + "Wrong mapping for GPR"); + assert( + checkPartMapping(PartMappings[PMI_SPR - PMI_Min], 0, 32, FPRRegBankID) && + "Wrong mapping for SPR"); + assert( + checkPartMapping(PartMappings[PMI_DPR - PMI_Min], 0, 64, FPRRegBankID) && + "Wrong mapping for DPR"); +} +#endif -RegisterBankInfo::PartialMapping GPRPartialMapping{0, 32, GPRRegBank}; +enum ValueMappingIdx { + InvalidIdx = 0, + GPR3OpsIdx = 1, + SPR3OpsIdx = 4, + DPR3OpsIdx = 7, +}; RegisterBankInfo::ValueMapping ValueMappings[] = { - {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}}; + // invalid + {nullptr, 0}, + // 3 ops in GPRs + {&PartMappings[PMI_GPR - PMI_Min], 1}, + {&PartMappings[PMI_GPR - PMI_Min], 1}, + {&PartMappings[PMI_GPR - PMI_Min], 1}, + // 3 ops in SPRs + {&PartMappings[PMI_SPR - PMI_Min], 1}, + {&PartMappings[PMI_SPR - PMI_Min], 1}, + {&PartMappings[PMI_SPR - PMI_Min], 1}, + // 3 ops in DPRs + {&PartMappings[PMI_DPR - PMI_Min], 1}, + {&PartMappings[PMI_DPR - PMI_Min], 1}, + {&PartMappings[PMI_DPR - PMI_Min], 1}}; + +#ifndef NDEBUG +static bool checkValueMapping(const RegisterBankInfo::ValueMapping &VM, + RegisterBankInfo::PartialMapping *BreakDown) { + return VM.NumBreakDowns == 1 && VM.BreakDown == BreakDown; +} + +static void checkValueMappings() { + assert(checkValueMapping(ValueMappings[GPR3OpsIdx], + &PartMappings[PMI_GPR - PMI_Min]) && + "Wrong value mapping for 3 GPR ops instruction"); + assert(checkValueMapping(ValueMappings[GPR3OpsIdx + 1], + &PartMappings[PMI_GPR - PMI_Min]) && + "Wrong value mapping for 3 GPR ops instruction"); + assert(checkValueMapping(ValueMappings[GPR3OpsIdx + 2], + &PartMappings[PMI_GPR - PMI_Min]) && + "Wrong value mapping for 3 GPR ops instruction"); + + assert(checkValueMapping(ValueMappings[SPR3OpsIdx], + &PartMappings[PMI_SPR - PMI_Min]) && + "Wrong value mapping for 3 SPR ops instruction"); + assert(checkValueMapping(ValueMappings[SPR3OpsIdx + 1], + &PartMappings[PMI_SPR - PMI_Min]) && + "Wrong value mapping for 3 SPR ops instruction"); + assert(checkValueMapping(ValueMappings[SPR3OpsIdx + 2], + &PartMappings[PMI_SPR - PMI_Min]) && + "Wrong value mapping for 3 SPR ops instruction"); + + assert(checkValueMapping(ValueMappings[DPR3OpsIdx], + &PartMappings[PMI_DPR - PMI_Min]) && + "Wrong value mapping for 3 DPR ops instruction"); + assert(checkValueMapping(ValueMappings[DPR3OpsIdx + 1], + &PartMappings[PMI_DPR - PMI_Min]) && + "Wrong value mapping for 3 DPR ops instruction"); + assert(checkValueMapping(ValueMappings[DPR3OpsIdx + 2], + &PartMappings[PMI_DPR - PMI_Min]) && + "Wrong value mapping for 3 DPR ops instruction"); +} +#endif } // end namespace arm } // end namespace llvm ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) - : RegisterBankInfo(ARM::RegBanks, ARM::NumRegisterBanks) { + : ARMGenRegisterBankInfo() { static bool AlreadyInit = false; // We have only one set of register banks, whatever the subtarget // is. Therefore, the initialization of the RegBanks table should be @@ -97,6 +166,11 @@ ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPR_and_tcGPRRegClassID)) && "Subclass not added?"); assert(RBGPR.getSize() == 32 && "GPRs should hold up to 32-bit"); + +#ifndef NDEBUG + ARM::checkPartialMappings(); + ARM::checkValueMappings(); +#endif } const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass( @@ -105,8 +179,16 @@ const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass( switch (RC.getID()) { case GPRRegClassID: + case GPRnopcRegClassID: + case GPRspRegClassID: case tGPR_and_tcGPRRegClassID: + case tGPRRegClassID: return getRegBank(ARM::GPRRegBankID); + case SPR_8RegClassID: + case SPRRegClassID: + case DPR_8RegClassID: + case DPRRegClassID: + return getRegBank(ARM::FPRRegBankID); default: llvm_unreachable("Unsupported register kind"); } @@ -114,37 +196,163 @@ const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass( llvm_unreachable("Switch should handle all register classes"); } -RegisterBankInfo::InstructionMapping +const RegisterBankInfo::InstructionMapping & ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { auto Opc = MI.getOpcode(); // Try the default logic for non-generic instructions that are either copies // or already have some operands assigned to banks. if (!isPreISelGenericOpcode(Opc)) { - InstructionMapping Mapping = getInstrMappingImpl(MI); + const InstructionMapping &Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; } using namespace TargetOpcode; + const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned NumOperands = MI.getNumOperands(); - const ValueMapping *OperandsMapping = &ARM::ValueMappings[0]; + const ValueMapping *OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; switch (Opc) { case G_ADD: - case G_LOAD: + case G_SUB: + case G_MUL: + case G_AND: + case G_OR: + case G_XOR: + case G_SDIV: + case G_UDIV: + case G_SEXT: + case G_ZEXT: + case G_ANYEXT: + case G_TRUNC: + case G_GEP: // FIXME: We're abusing the fact that everything lives in a GPR for now; in // the real world we would use different mappings. - OperandsMapping = &ARM::ValueMappings[0]; + OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; + break; + case G_LOAD: + case G_STORE: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + OperandsMapping = + Ty.getSizeInBits() == 64 + ? getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx]}) + : &ARM::ValueMappings[ARM::GPR3OpsIdx]; break; + } + case G_FADD: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + assert((Ty.getSizeInBits() == 32 || Ty.getSizeInBits() == 64) && + "Unsupported size for G_FADD"); + OperandsMapping = Ty.getSizeInBits() == 64 + ? &ARM::ValueMappings[ARM::DPR3OpsIdx] + : &ARM::ValueMappings[ARM::SPR3OpsIdx]; + break; + } + case G_CONSTANT: case G_FRAME_INDEX: - OperandsMapping = getOperandsMapping({&ARM::ValueMappings[0], nullptr}); + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); + break; + case G_SELECT: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + (void)Ty; + LLT Ty2 = MRI.getType(MI.getOperand(1).getReg()); + (void)Ty2; + assert(Ty.getSizeInBits() == 32 && "Unsupported size for G_SELECT"); + assert(Ty2.getSizeInBits() == 1 && "Unsupported size for G_SELECT"); + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx]}); + break; + } + case G_ICMP: { + LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); + (void)Ty2; + assert(Ty2.getSizeInBits() == 32 && "Unsupported size for G_ICMP"); + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr, + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx]}); + break; + } + case G_FCMP: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + (void)Ty; + LLT Ty1 = MRI.getType(MI.getOperand(2).getReg()); + LLT Ty2 = MRI.getType(MI.getOperand(3).getReg()); + (void)Ty2; + assert(Ty.getSizeInBits() == 1 && "Unsupported size for G_FCMP"); + assert(Ty1.getSizeInBits() == Ty2.getSizeInBits() && + "Mismatched operand sizes for G_FCMP"); + + unsigned Size = Ty1.getSizeInBits(); + assert((Size == 32 || Size == 64) && "Unsupported size for G_FCMP"); + + auto FPRValueMapping = Size == 32 ? &ARM::ValueMappings[ARM::SPR3OpsIdx] + : &ARM::ValueMappings[ARM::DPR3OpsIdx]; + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr, + FPRValueMapping, FPRValueMapping}); + break; + } + case G_MERGE_VALUES: { + // We only support G_MERGE_VALUES for creating a double precision floating + // point value out of two GPRs. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); + LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); + if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 || + Ty2.getSizeInBits() != 32) + return getInvalidInstructionMapping(); + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx]}); + break; + } + case G_UNMERGE_VALUES: { + // We only support G_UNMERGE_VALUES for splitting a double precision + // floating point value into two GPRs. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); + LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); + if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 32 || + Ty2.getSizeInBits() != 64) + return getInvalidInstructionMapping(); + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::DPR3OpsIdx]}); + break; + } + case G_BR: + OperandsMapping = getOperandsMapping({nullptr}); + break; + case G_BRCOND: + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); break; default: - return InstructionMapping{}; + return getInvalidInstructionMapping(); } - return InstructionMapping{DefaultMappingID, /*Cost=*/1, OperandsMapping, - NumOperands}; +#ifndef NDEBUG + for (unsigned i = 0; i < NumOperands; i++) { + for (const auto &Mapping : OperandsMapping[i]) { + assert( + (Mapping.RegBank->getID() != ARM::FPRRegBankID || + MF.getSubtarget<ARMSubtarget>().hasVFP2()) && + "Trying to use floating point register bank on target without vfp"); + } + } +#endif + + return getInstructionMapping(DefaultMappingID, /*Cost=*/1, OperandsMapping, + NumOperands); } diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h index 773920e..9650b35 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h @@ -16,26 +16,28 @@ #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#define GET_REGBANK_DECLARATIONS +#include "ARMGenRegisterBank.inc" + namespace llvm { class TargetRegisterInfo; -namespace ARM { -enum { - GPRRegBankID = 0, // General purpose registers - NumRegisterBanks, +class ARMGenRegisterBankInfo : public RegisterBankInfo { +#define GET_TARGET_REGBANK_CLASS +#include "ARMGenRegisterBank.inc" }; -} // end namespace ARM /// This class provides the information for the target register banks. -class ARMRegisterBankInfo final : public RegisterBankInfo { +class ARMRegisterBankInfo final : public ARMGenRegisterBankInfo { public: ARMRegisterBankInfo(const TargetRegisterInfo &TRI); const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC) const override; - InstructionMapping getInstrMapping(const MachineInstr &MI) const override; + const InstructionMapping & + getInstrMapping(const MachineInstr &MI) const override; }; } // End llvm namespace. #endif diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBanks.td b/contrib/llvm/lib/Target/ARM/ARMRegisterBanks.td new file mode 100644 index 0000000..7cd2d60 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBanks.td @@ -0,0 +1,14 @@ +//=- ARMRegisterBank.td - Describe the AArch64 Banks ---------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +def GPRRegBank : RegisterBank<"GPRB", [GPR, GPRwithAPSR]>; +def FPRRegBank : RegisterBank<"FPRB", [SPR, DPR]>; diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td index 02cbfb1..b10583b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td @@ -245,6 +245,10 @@ def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> { // the general GPR register class above (MOV, e.g.) def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>; +// Thumb registers R0-R7 and the PC. Some instructions like TBB or THH allow +// the PC to be used as a destination operand as well. +def tGPRwithpc : RegisterClass<"ARM", [i32], 32, (add tGPR, PC)>; + // The high registers in thumb mode, R8-R15. def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td index b7d2d34..53e012f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSchedule.td +++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Instruction scheduling annotations for out-of-order CPUs. +// Instruction scheduling annotations for in-order and out-of-order CPUs. // These annotations are independent of the itinerary class defined below. // Here we define the subtarget independent read/write per-operand resources. // The subtarget schedule definitions will then map these to the subtarget's @@ -54,6 +54,9 @@ // } // def : ReadAdvance<ReadAdvanceALUsr, 3>; +//===----------------------------------------------------------------------===// +// Sched definitions for integer pipeline instructions +// // Basic ALU operation. def WriteALU : SchedWrite; def ReadALU : SchedRead; @@ -69,29 +72,84 @@ def WriteCMP : SchedWrite; def WriteCMPsi : SchedWrite; def WriteCMPsr : SchedWrite; -// Division. -def WriteDiv : SchedWrite; +// Multiplys. +def WriteMUL16 : SchedWrite; // 16-bit multiply. +def WriteMUL32 : SchedWrite; // 32-bit multiply. +def WriteMUL64Lo : SchedWrite; // 64-bit result. Low reg. +def WriteMUL64Hi : SchedWrite; // 64-bit result. High reg. +def ReadMUL : SchedRead; + +// Multiply-accumulates. +def WriteMAC16 : SchedWrite; // 16-bit mac. +def WriteMAC32 : SchedWrite; // 32-bit mac. +def WriteMAC64Lo : SchedWrite; // 64-bit mac. Low reg. +def WriteMAC64Hi : SchedWrite; // 64-bit mac. High reg. +def ReadMAC : SchedRead; + +// Divisions. +def WriteDIV : SchedWrite; -// Loads. +// Loads/Stores. def WriteLd : SchedWrite; def WritePreLd : SchedWrite; +def WriteST : SchedWrite; // Branches. def WriteBr : SchedWrite; def WriteBrL : SchedWrite; def WriteBrTbl : SchedWrite; -// Fixpoint conversions. -def WriteCvtFP : SchedWrite; - // Noop. def WriteNoop : SchedWrite; +//===----------------------------------------------------------------------===// +// Sched definitions for floating-point and neon instructions +// +// Floating point conversions +def WriteFPCVT : SchedWrite; +def WriteFPMOV : SchedWrite; // FP -> GPR and vice-versa + +// ALU operations (32/64-bit) +def WriteFPALU32 : SchedWrite; +def WriteFPALU64 : SchedWrite; + +// Multiplication +def WriteFPMUL32 : SchedWrite; +def WriteFPMUL64 : SchedWrite; +def ReadFPMUL : SchedRead; // multiplier read +def ReadFPMAC : SchedRead; // accumulator read + +// Multiply-accumulate +def WriteFPMAC32 : SchedWrite; +def WriteFPMAC64 : SchedWrite; + +// Division +def WriteFPDIV32 : SchedWrite; +def WriteFPDIV64 : SchedWrite; + +// Square-root +def WriteFPSQRT32 : SchedWrite; +def WriteFPSQRT64 : SchedWrite; + +// Vector load and stores +def WriteVLD1 : SchedWrite; +def WriteVLD2 : SchedWrite; +def WriteVLD3 : SchedWrite; +def WriteVLD4 : SchedWrite; +def WriteVST1 : SchedWrite; +def WriteVST2 : SchedWrite; +def WriteVST3 : SchedWrite; +def WriteVST4 : SchedWrite; + + // Define TII for use in SchedVariant Predicates. def : PredicateProlog<[{ const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo()); (void)TII; + const ARMSubtarget *STI = + static_cast<const ARMSubtarget*>(SchedModel->getSubtargetInfo()); + (void)STI; }]>; def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>; @@ -365,3 +423,5 @@ include "ARMScheduleA8.td" include "ARMScheduleA9.td" include "ARMScheduleSwift.td" include "ARMScheduleR52.td" +include "ARMScheduleA57.td" +include "ARMScheduleM3.td" diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA57.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA57.td new file mode 100644 index 0000000..525079d --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA57.td @@ -0,0 +1,1471 @@ +//=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for ARM Cortex-A57 to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// *** Common description and scheduling model parameters taken from AArch64 *** +// The Cortex-A57 is a traditional superscalar microprocessor with a +// conservative 3-wide in-order stage for decode and dispatch. Combined with the +// much wider out-of-order issue stage, this produced a need to carefully +// schedule micro-ops so that all three decoded each cycle are successfully +// issued as the reservation station(s) simply don't stay occupied for long. +// Therefore, IssueWidth is set to the narrower of the two at three, while still +// modeling the machine as out-of-order. + +def IsCPSRDefinedPred : SchedPredicate<[{TII->isCPSRDefined(*MI)}]>; +def IsCPSRDefinedAndPredicatedPred : + SchedPredicate<[{TII->isCPSRDefined(*MI) && TII->isPredicated(*MI)}]>; + +// Cortex A57 rev. r1p0 or later (false = r0px) +def IsR1P0AndLaterPred : SchedPredicate<[{false}]>; + +// If Addrmode3 contains register offset (not immediate) +def IsLdrAm3RegOffPred : + SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 1)}]>; +// The same predicate with operand offset 2 and 3: +def IsLdrAm3RegOffPredX2 : + SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 2)}]>; +def IsLdrAm3RegOffPredX3 : + SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 3)}]>; + +// If Addrmode3 contains "minus register" +def IsLdrAm3NegRegOffPred : + SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 1)}]>; +// The same predicate with operand offset 2 and 3: +def IsLdrAm3NegRegOffPredX2 : + SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 2)}]>; +def IsLdrAm3NegRegOffPredX3 : + SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 3)}]>; + +// Load, scaled register offset, not plus LSL2 +def IsLdstsoScaledNotOptimalPredX0 : + SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 0)}]>; +def IsLdstsoScaledNotOptimalPred : + SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 1)}]>; +def IsLdstsoScaledNotOptimalPredX2 : + SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 2)}]>; + +// Load, scaled register offset +def IsLdstsoScaledPred : + SchedPredicate<[{TII->isLdstScaledReg(*MI, 1)}]>; +def IsLdstsoScaledPredX2 : + SchedPredicate<[{TII->isLdstScaledReg(*MI, 2)}]>; + +def IsLdstsoMinusRegPredX0 : + SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 0)}]>; +def IsLdstsoMinusRegPred : + SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 1)}]>; +def IsLdstsoMinusRegPredX2 : + SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 2)}]>; + +// Load, scaled register offset +def IsLdrAm2ScaledPred : + SchedPredicate<[{TII->isAm2ScaledReg(*MI, 1)}]>; + +// LDM, base reg in list +def IsLdmBaseRegInList : + SchedPredicate<[{TII->isLDMBaseRegInList(*MI)}]>; + +class A57WriteLMOpsListType<list<SchedWriteRes> writes> { + list <SchedWriteRes> Writes = writes; + SchedMachineModel SchedModel = ?; +} + +// *** Common description and scheduling model parameters taken from AArch64 *** +// (AArch64SchedA57.td) +def CortexA57Model : SchedMachineModel { + let IssueWidth = 3; // 3-way decode and dispatch + let MicroOpBufferSize = 128; // 128 micro-op re-order buffer + let LoadLatency = 4; // Optimistic load latency + let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch + + // Enable partial & runtime unrolling. + let LoopMicroOpBufferSize = 16; + let CompleteModel = 1; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Cortex-A57. +// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where +// micro-ops wait for their operands and then issue out-of-order. + +def A57UnitB : ProcResource<1>; // Type B micro-ops +def A57UnitI : ProcResource<2>; // Type I micro-ops +def A57UnitM : ProcResource<1>; // Type M micro-ops +def A57UnitL : ProcResource<1>; // Type L micro-ops +def A57UnitS : ProcResource<1>; // Type S micro-ops + +def A57UnitX : ProcResource<1>; // Type X micro-ops (F1) +def A57UnitW : ProcResource<1>; // Type W micro-ops (F0) + +let SchedModel = CortexA57Model in { + def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops +} + +let SchedModel = CortexA57Model in { + +//===----------------------------------------------------------------------===// +// Define customized scheduler read/write types specific to the Cortex-A57. + +include "ARMScheduleA57WriteRes.td" + +// To have "CompleteModel = 1", support of pseudos and special instructions +def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$", + "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$", + "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$", + "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$", + "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE", + "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "UDF$", "t2DCPS", "t2SG", + "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier")>; + +def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>; + +// Specific memory instrs +def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC", + "(t2)?STL", "(t2)?LDREX", "(t2)?STREX", "MEMCPY")>; + +// coprocessor moves +def : InstRW<[WriteNoop, WriteNoop], (instregex + "(t2)?MCR(2|R|R2)?$", "(t2)?MRC(2)?$", + "(t2)?MRRC(2)?$", "(t2)?MRS(banked|sys|_AR|_M|sys_AR)?$", + "(t2)?MSR(banked|i|_AR|_M)?$")>; + +// Deprecated instructions +def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>; + +// Pseudos +def : InstRW<[WriteNoop], (instregex "(t2)?ABS$", + "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj", + "tLDRpci_pic", "t2SUBS_PC_LR", + "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp", + "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", + "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", + "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", + "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", + "WIN__CHKSTK", "WIN__DBZCHK")>; + +// Miscellaneous +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1I], (instrs COPY)>; + +// --- 3.2 Branch Instructions --- +// B, BX, BL, BLX (imm, reg != LR, reg == LR), CBZ, CBNZ + +def : InstRW<[A57Write_1cyc_1B], (instregex "(t2|t)?B$", "t?BX", "(t2|t)?Bcc$", + "t?TAILJMP(d|r)", "TCRETURN(d|r)i", "tBfar", "tCBN?Z")>; +def : InstRW<[A57Write_1cyc_1B_1I], + (instregex "t?BL$", "BL_pred$", "t?BLXi", "t?TPsoft")>; +def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BLX", "tBLX(NS)?r")>; +// Pseudos +def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BCCi64", "BCCZi64")>; +def : InstRW<[A57Write_3cyc_1B_1I], (instregex "BR_JTadd", "t?BR_JTr", + "t2BR_JT", "t2BXJ", "(t2)?TB(B|H)(_JT)?$", "tBRIND")>; +def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>; + +// --- 3.3 Arithmetic and Logical Instructions --- +// ADD{S}, ADC{S}, ADR, AND{S}, BIC{S}, CMN, CMP, EOR{S}, ORN{S}, ORR{S}, +// RSB{S}, RSC{S}, SUB{S}, SBC{S}, TEQ, TST + +def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>; + +// shift by register, conditional or unconditional +// TODO: according to the doc, conditional uses I0/I1, unconditional uses M +// Why more complex instruction uses more simple pipeline? +// May be an error in doc. +def A57WriteALUsi : SchedWriteVariant<[ + // lsl #2, lsl #1, or lsr #1. + SchedVar<IsPredicatedPred, [A57Write_2cyc_1M]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1M]> +]>; +def A57WriteALUsr : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1M]> +]>; +def A57WriteALUSsr : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1M]> +]>; +def A57ReadALUsr : SchedReadVariant<[ + SchedVar<IsPredicatedPred, [ReadDefault]>, + SchedVar<NoSchedPred, [ReadDefault]> +]>; +def : SchedAlias<WriteALUsi, A57WriteALUsi>; +def : SchedAlias<WriteALUsr, A57WriteALUsr>; +def : SchedAlias<WriteALUSsr, A57WriteALUSsr>; +def : SchedAlias<ReadALUsr, A57ReadALUsr>; + +def A57WriteCMPsr : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1M]> +]>; +def : SchedAlias<WriteCMP, A57Write_1cyc_1I>; +def : SchedAlias<WriteCMPsi, A57Write_2cyc_1M>; +def : SchedAlias<WriteCMPsr, A57WriteCMPsr>; + +// --- 3.4 Move and Shift Instructions --- +// Move, basic +// MOV{S}, MOVW, MVN{S} +def : InstRW<[A57Write_1cyc_1I], (instregex "MOV(r|i|i16|r_TC)", + "(t2)?MVN(CC)?(r|i)", "BMOVPCB_CALL", "BMOVPCRX_CALL", + "MOVCC(r|i|i16|i32imm)", "tMOV", "tMVN")>; + +// Move, shift by immed, setflags/no setflags +// (ASR, LSL, LSR, ROR, RRX)=MOVsi, MVN +// setflags = isCPSRDefined +def A57WriteMOVsi : SchedWriteVariant<[ + SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1I]> +]>; +def : InstRW<[A57WriteMOVsi], (instregex "MOV(CC)?si", "MVNsi", + "ASRi", "(t2|t)ASRri", "LSRi", "(t2|t)LSRri", "LSLi", "(t2|t)LSLri", "RORi", + "(t2|t)RORri", "(t2)?RRX", "t2MOV", "tROR")>; + +// shift by register, conditional or unconditional, setflags/no setflags +def A57WriteMOVsr : SchedWriteVariant<[ + SchedVar<IsCPSRDefinedAndPredicatedPred, [A57Write_2cyc_1I]>, + SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>, + SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1I]> +]>; +def : InstRW<[A57WriteMOVsr], (instregex "MOV(CC)?sr", "MVNsr", "t2MVNs", + "ASRr", "(t2|t)ASRrr", "LSRr", "(t2|t)LSRrr", "LSLr", "(t2|t)?LSLrr", "RORr", + "(t2|t)RORrr")>; + +// Move, top +// MOVT - A57Write_2cyc_1M for r0px, A57Write_1cyc_1I for r1p0 and later +def A57WriteMOVT : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_1cyc_1I]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1M]> +]>; +def : InstRW<[A57WriteMOVT], (instregex "MOVTi16")>; + +def A57WriteI2pc : + WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_1cyc_1I]>; +def A57WriteI2ld : + WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_4cyc_1L]>; +def : InstRW< [A57WriteI2pc], (instregex "MOV_ga_pcrel")>; +def : InstRW< [A57WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>; + +// +2cyc for branch forms +def : InstRW<[A57Write_3cyc_1I], (instregex "MOVPC(LR|RX)")>; + +// --- 3.5 Divide and Multiply Instructions --- +// Divide: SDIV, UDIV +// latency from documentration: 4 ‐ 20, maximum taken +def : SchedAlias<WriteDIV, A57Write_20cyc_1M>; +// Multiply: tMul not bound to common WriteRes types +def : InstRW<[A57Write_3cyc_1M], (instregex "tMUL")>; +def : SchedAlias<WriteMUL16, A57Write_3cyc_1M>; +def : SchedAlias<WriteMUL32, A57Write_3cyc_1M>; +def : ReadAdvance<ReadMUL, 0>; + +// Multiply accumulate: MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, +// SMLAWT, SMLAD{X}, SMLSD{X}, SMMLA{R}, SMMLS{R} +// Multiply-accumulate pipelines support late-forwarding of accumulate operands +// from similar μops, allowing a typical sequence of multiply-accumulate μops +// to issue one every 1 cycle (sched advance = 2). +def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; } +def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; } +def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>; + +def : SchedAlias<WriteMAC16, A57WriteMLA>; +def : SchedAlias<WriteMAC32, A57WriteMLA>; +def : SchedAlias<ReadMAC, A57ReadMLA>; + +def : SchedAlias<WriteMAC64Lo, A57WriteMLAL>; +def : SchedAlias<WriteMAC64Hi, A57WriteMLAL>; + +// Multiply long: SMULL, UMULL +def : SchedAlias<WriteMUL64Lo, A57Write_4cyc_1M>; +def : SchedAlias<WriteMUL64Hi, A57Write_4cyc_1M>; + +// --- 3.6 Saturating and Parallel Arithmetic Instructions --- +// Parallel arith +// SADD16, SADD8, SSUB16, SSUB8, UADD16, UADD8, USUB16, USUB8 +// Conditional GE-setting instructions require three extra μops +// and two additional cycles to conditionally update the GE field. +def A57WriteParArith : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57Write_4cyc_1I_1M]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1I_1M]> +]>; +def : InstRW< [A57WriteParArith], (instregex + "(t2)?SADD(16|8)", "(t2)?SSUB(16|8)", + "(t2)?UADD(16|8)", "(t2)?USUB(16|8)")>; + +// Parallel arith with exchange: SASX, SSAX, UASX, USAX +def A57WriteParArithExch : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57Write_5cyc_1I_1M]>, + SchedVar<NoSchedPred, [A57Write_3cyc_1I_1M]> +]>; +def : InstRW<[A57WriteParArithExch], + (instregex "(t2)?SASX", "(t2)?SSAX", "(t2)?UASX", "(t2)?USAX")>; + +// Parallel halving arith +// SHADD16, SHADD8, SHSUB16, SHSUB8, UHADD16, UHADD8, UHSUB16, UHSUB8 +def : InstRW<[A57Write_2cyc_1M], (instregex + "(t2)?SHADD(16|8)", "(t2)?SHSUB(16|8)", + "(t2)?UHADD(16|8)", "(t2)?UHSUB(16|8)")>; + +// Parallel halving arith with exchange +// SHASX, SHSAX, UHASX, UHSAX +def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?SHASX", "(t2)?SHSAX", + "(t2)?UHASX", "(t2)?UHSAX")>; + +// Parallel saturating arith +// QADD16, QADD8, QSUB16, QSUB8, UQADD16, UQADD8, UQSUB16, UQSUB8 +def : InstRW<[A57Write_2cyc_1M], (instregex "QADD(16|8)", "QSUB(16|8)", + "UQADD(16|8)", "UQSUB(16|8)", "t2(U?)QADD", "t2(U?)QSUB")>; + +// Parallel saturating arith with exchange +// QASX, QSAX, UQASX, UQSAX +def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QASX", "(t2)?QSAX", + "(t2)?UQASX", "(t2)?UQSAX")>; + +// Saturate: SSAT, SSAT16, USAT, USAT16 +def : InstRW<[A57Write_2cyc_1M], + (instregex "(t2)?SSAT(16)?", "(t2)?USAT(16)?")>; + +// Saturating arith: QADD, QSUB +def : InstRW<[A57Write_2cyc_1M], (instregex "QADD$", "QSUB$")>; + +// Saturating doubling arith: QDADD, QDSUB +def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QDADD", "(t2)?QDSUB")>; + +// --- 3.7 Miscellaneous Data-Processing Instructions --- +// Bit field extract: SBFX, UBFX +def : InstRW<[A57Write_1cyc_1I], (instregex "(t2)?SBFX", "(t2)?UBFX")>; + +// Bit field insert/clear: BFI, BFC +def : InstRW<[A57Write_2cyc_1M], (instregex "(t2)?BFI", "(t2)?BFC")>; + +// Select bytes, conditional/unconditional +def A57WriteSEL : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1I]> +]>; +def : InstRW<[A57WriteSEL], (instregex "(t2)?SEL")>; + +// Sign/zero extend, normal: SXTB, SXTH, UXTB, UXTH +def : InstRW<[A57Write_1cyc_1I], + (instregex "(t2|t)?SXT(B|H)$", "(t2|t)?UXT(B|H)$")>; + +// Sign/zero extend and add, normal: SXTAB, SXTAH, UXTAB, UXTAH +def : InstRW<[A57Write_2cyc_1M], + (instregex "(t2)?SXTA(B|H)$", "(t2)?UXTA(B|H)$")>; + +// Sign/zero extend and add, parallel: SXTAB16, UXTAB16 +def : InstRW<[A57Write_4cyc_1M], (instregex "(t2)?SXTAB16", "(t2)?UXTAB16")>; + +// Sum of absolute differences: USAD8, USADA8 +def : InstRW<[A57Write_3cyc_1M], (instregex "(t2)?USAD8", "(t2)?USADA8")>; + +// --- 3.8 Load Instructions --- + +// Load, immed offset +// LDR and LDRB have LDRi12 and LDRBi12 forms for immediate +def : InstRW<[A57Write_4cyc_1L], (instregex "LDRi12", "LDRBi12", + "LDRcp", "(t2|t)?LDRConstPool", "LDRLIT_ga_(pcrel|abs)", + "PICLDR", "tLDR")>; + +def : InstRW<[A57Write_4cyc_1L], + (instregex "t2LDRS?(B|H)?(pcrel|T|i8|i12|pci|pci_pic|s)?$")>; + +// For "Load, register offset, minus" we need +1cyc, +1I +def A57WriteLdrAm3 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPred, [A57Write_5cyc_1I_1L]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1L]> +]>; +def : InstRW<[A57WriteLdrAm3], (instregex "LDR(H|SH|SB)$")>; +def A57WriteLdrAm3X2 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_5cyc_1I_1L]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1L]> +]>; +def : InstRW<[A57WriteLdrAm3X2, A57WriteLdrAm3X2], (instregex "LDRD$")>; +def : InstRW<[A57Write_4cyc_1L, A57Write_4cyc_1L], (instregex "t2LDRDi8")>; + +def A57WriteLdrAmLDSTSO : SchedWriteVariant<[ + SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_5cyc_1I_1L]>, + SchedVar<IsLdstsoMinusRegPred, [A57Write_5cyc_1I_1L]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1L]> +]>; +def : InstRW<[A57WriteLdrAmLDSTSO], (instregex "LDRrs", "LDRBrs")>; + +def A57WrBackOne : SchedWriteRes<[]> { + let Latency = 1; + let NumMicroOps = 0; +} +def A57WrBackTwo : SchedWriteRes<[]> { + let Latency = 2; + let NumMicroOps = 0; +} +def A57WrBackThree : SchedWriteRes<[]> { + let Latency = 3; + let NumMicroOps = 0; +} + +// --- LDR pre-indexed --- +// Load, immed pre-indexed (4 cyc for load result, 1 cyc for Base update) +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR_PRE_IMM", + "LDRB_PRE_IMM", "t2LDRB_PRE")>; + +// Load, register pre-indexed (4 cyc for load result, 2 cyc for Base update) +// (5 cyc load result for not-lsl2 scaled) +def A57WriteLdrAmLDSTSOPre : SchedWriteVariant<[ + SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_5cyc_1I_1L]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]> +]>; +def : InstRW<[A57WriteLdrAmLDSTSOPre, A57WrBackTwo], + (instregex "LDR_PRE_REG", "LDRB_PRE_REG")>; + +def A57WriteLdrAm3PreWrBack : SchedWriteVariant<[ + SchedVar<IsLdrAm3RegOffPredX2, [A57WrBackTwo]>, + SchedVar<NoSchedPred, [A57WrBackOne]> +]>; +def : InstRW<[A57Write_4cyc_1L, A57WriteLdrAm3PreWrBack], + (instregex "LDR(H|SH|SB)_PRE")>; +def : InstRW<[A57Write_4cyc_1L, A57WrBackOne], + (instregex "t2LDR(H|SH|SB)?_PRE")>; + +// LDRD pre-indexed: 5(2) cyc for reg, 4(1) cyc for imm. +def A57WriteLdrDAm3Pre : SchedWriteVariant<[ + SchedVar<IsLdrAm3RegOffPredX3, [A57Write_5cyc_1I_1L]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]> +]>; +def A57WriteLdrDAm3PreWrBack : SchedWriteVariant<[ + SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>, + SchedVar<NoSchedPred, [A57WrBackOne]> +]>; +def : InstRW<[A57WriteLdrDAm3Pre, A57WriteLdrDAm3Pre, A57WriteLdrDAm3PreWrBack], + (instregex "LDRD_PRE")>; +def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne], + (instregex "t2LDRD_PRE")>; + +// --- LDR post-indexed --- +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR(T?)_POST_IMM", + "LDRB(T?)_POST_IMM", "LDR(SB|H|SH)Ti", "t2LDRB_POST")>; + +def A57WriteLdrAm3PostWrBack : SchedWriteVariant<[ + SchedVar<IsLdrAm3RegOffPred, [A57WrBackTwo]>, + SchedVar<NoSchedPred, [A57WrBackOne]> +]>; +def : InstRW<[A57Write_4cyc_1L_1I, A57WriteLdrAm3PostWrBack], + (instregex "LDR(H|SH|SB)_POST")>; +def : InstRW<[A57Write_4cyc_1L, A57WrBackOne], + (instregex "t2LDR(H|SH|SB)?_POST")>; + +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG", + "LDRB_POST_REG", "LDR(B?)T_POST$")>; + +def A57WriteLdrTRegPost : SchedWriteVariant<[ + SchedVar<IsLdrAm2ScaledPred, [A57Write_4cyc_1I_1L_1M]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]> +]>; +def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[ + SchedVar<IsLdrAm2ScaledPred, [A57WrBackThree]>, + SchedVar<NoSchedPred, [A57WrBackTwo]> +]>; +// 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L" +def : InstRW<[A57WriteLdrTRegPost, A57WriteLdrTRegPostWrBack], + (instregex "LDRT_POST_REG", "LDRBT_POST_REG")>; + +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR(SB|H|SH)Tr")>; + +def A57WriteLdrAm3PostWrBackX3 : SchedWriteVariant<[ + SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>, + SchedVar<NoSchedPred, [A57WrBackOne]> +]>; +// LDRD post-indexed: 4(2) cyc for reg, 4(1) cyc for imm. +def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, + A57WriteLdrAm3PostWrBackX3], (instregex "LDRD_POST")>; +def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne], + (instregex "t2LDRD_POST")>; + +// --- Preload instructions --- +// Preload, immed offset +def : InstRW<[A57Write_4cyc_1L], (instregex "(t2)?PLDi12", "(t2)?PLDWi12", + "t2PLDW?(i8|pci|s)", "(t2)?PLI")>; + +// Preload, register offset, +// 5cyc "I0/I1,L" for minus reg or scaled not plus lsl2 +// otherwise 4cyc "L" +def A57WritePLD : SchedWriteVariant<[ + SchedVar<IsLdstsoScaledNotOptimalPredX0, [A57Write_5cyc_1I_1L]>, + SchedVar<IsLdstsoMinusRegPredX0, [A57Write_5cyc_1I_1L]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1L]> +]>; +def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>; + +// --- Load multiple instructions --- +foreach NumAddr = 1-8 in { + def A57LMAddrPred#NumAddr : + SchedPredicate<"(TII->getLDMVariableDefsSize(*MI)+1)/2 == "#NumAddr>; +} + +def A57LDMOpsListNoregin : A57WriteLMOpsListType< + [A57Write_3cyc_1L, A57Write_3cyc_1L, + A57Write_4cyc_1L, A57Write_4cyc_1L, + A57Write_5cyc_1L, A57Write_5cyc_1L, + A57Write_6cyc_1L, A57Write_6cyc_1L, + A57Write_7cyc_1L, A57Write_7cyc_1L, + A57Write_8cyc_1L, A57Write_8cyc_1L, + A57Write_9cyc_1L, A57Write_9cyc_1L, + A57Write_10cyc_1L, A57Write_10cyc_1L]>; +def A57WriteLDMnoreginlist : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, A57LDMOpsListNoregin.Writes[0-1]>, + SchedVar<A57LMAddrPred2, A57LDMOpsListNoregin.Writes[0-3]>, + SchedVar<A57LMAddrPred3, A57LDMOpsListNoregin.Writes[0-5]>, + SchedVar<A57LMAddrPred4, A57LDMOpsListNoregin.Writes[0-7]>, + SchedVar<A57LMAddrPred5, A57LDMOpsListNoregin.Writes[0-9]>, + SchedVar<A57LMAddrPred6, A57LDMOpsListNoregin.Writes[0-11]>, + SchedVar<A57LMAddrPred7, A57LDMOpsListNoregin.Writes[0-13]>, + SchedVar<A57LMAddrPred8, A57LDMOpsListNoregin.Writes[0-15]>, + SchedVar<NoSchedPred, A57LDMOpsListNoregin.Writes[0-15]> +]> { let Variadic=1; } + +def A57LDMOpsListRegin : A57WriteLMOpsListType< + [A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, + A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, + A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, + A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, + A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I, + A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I]>; +def A57WriteLDMreginlist : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, A57LDMOpsListRegin.Writes[0-1]>, + SchedVar<A57LMAddrPred2, A57LDMOpsListRegin.Writes[0-3]>, + SchedVar<A57LMAddrPred3, A57LDMOpsListRegin.Writes[0-5]>, + SchedVar<A57LMAddrPred4, A57LDMOpsListRegin.Writes[0-7]>, + SchedVar<A57LMAddrPred5, A57LDMOpsListRegin.Writes[0-9]>, + SchedVar<A57LMAddrPred6, A57LDMOpsListRegin.Writes[0-11]>, + SchedVar<A57LMAddrPred7, A57LDMOpsListRegin.Writes[0-13]>, + SchedVar<A57LMAddrPred8, A57LDMOpsListRegin.Writes[0-15]>, + SchedVar<NoSchedPred, A57LDMOpsListRegin.Writes[0-15]> +]> { let Variadic=1; } + +def A57LDMOpsList_Upd : A57WriteLMOpsListType< + [A57WrBackOne, + A57Write_3cyc_1L_1I, A57Write_3cyc_1L_1I, + A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, + A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, + A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, + A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, + A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>; +def A57WriteLDM_Upd : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, A57LDMOpsList_Upd.Writes[0-2]>, + SchedVar<A57LMAddrPred2, A57LDMOpsList_Upd.Writes[0-4]>, + SchedVar<A57LMAddrPred3, A57LDMOpsList_Upd.Writes[0-6]>, + SchedVar<A57LMAddrPred4, A57LDMOpsList_Upd.Writes[0-8]>, + SchedVar<A57LMAddrPred5, A57LDMOpsList_Upd.Writes[0-10]>, + SchedVar<A57LMAddrPred6, A57LDMOpsList_Upd.Writes[0-12]>, + SchedVar<A57LMAddrPred7, A57LDMOpsList_Upd.Writes[0-14]>, + SchedVar<A57LMAddrPred8, A57LDMOpsList_Upd.Writes[0-16]>, + SchedVar<NoSchedPred, A57LDMOpsList_Upd.Writes[0-16]> +]> { let Variadic=1; } + +def A57WriteLDM : SchedWriteVariant<[ + SchedVar<IsLdmBaseRegInList, [A57WriteLDMreginlist]>, + SchedVar<NoSchedPred, [A57WriteLDMnoreginlist]> +]> { let Variadic=1; } + +def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>; + +// TODO: no writeback latency defined in documentation (implemented as 1 cyc) +def : InstRW<[A57WriteLDM_Upd], + (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>; + +// --- 3.9 Store Instructions --- + +// Store, immed offset +def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR", + "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>; + +// Store, register offset +// For minus or for not plus lsl2 scaled we need 3cyc "I0/I1, S", +// otherwise 1cyc S. +def A57WriteStrAmLDSTSO : SchedWriteVariant<[ + SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_3cyc_1I_1S]>, + SchedVar<IsLdstsoMinusRegPred, [A57Write_3cyc_1I_1S]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1S]> +]>; +def : InstRW<[A57WriteStrAmLDSTSO], (instregex "STRrs", "STRBrs")>; + +// STRH,STRD: 3cyc "I0/I1, S" for minus reg, 1cyc S for imm or for plus reg. +def A57WriteStrAm3 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPred, [A57Write_3cyc_1I_1S]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1S]> +]>; +def : InstRW<[A57WriteStrAm3], (instregex "STRH$")>; +def A57WriteStrAm3X2 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1S]> +]>; +def : InstRW<[A57WriteStrAm3X2], (instregex "STRD$")>; + +// Store, immed pre-indexed (1cyc "S, I0/I1", 1cyc writeback) +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR_PRE_IMM", + "STRB_PRE_IMM", "STR(B)?(r|i)_preidx", "(t2)?STRH_(preidx|PRE)", + "t2STR(B?)_(PRE|preidx)", "t2STRD_PRE")>; + +// Store, register pre-indexed: +// 1(1) "S, I0/I1" for plus reg +// 3(2) "I0/I1, S" for minus reg +// 1(2) "S, M" for scaled plus lsl2 +// 3(2) "I0/I1, S" for other scaled +def A57WriteStrAmLDSTSOPre : SchedWriteVariant<[ + SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_3cyc_1I_1S]>, + SchedVar<IsLdstsoMinusRegPredX2, [A57Write_3cyc_1I_1S]>, + SchedVar<IsLdstsoScaledPredX2, [A57Write_1cyc_1S_1M]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]> +]>; +def A57WriteStrAmLDSTSOPreWrBack : SchedWriteVariant<[ + SchedVar<IsLdstsoScaledPredX2, [A57WrBackTwo]>, + SchedVar<IsLdstsoMinusRegPredX2, [A57WrBackTwo]>, + SchedVar<NoSchedPred, [A57WrBackOne]> +]>; +def : InstRW<[A57WriteStrAmLDSTSOPreWrBack, A57WriteStrAmLDSTSOPre], + (instregex "STR_PRE_REG", "STRB_PRE_REG")>; + +// pre-indexed STRH/STRD (STRH_PRE, STRD_PRE) +// 1(1) "S, I0/I1" for imm or reg plus +// 3(2) "I0/I1, S" for reg minus +def A57WriteStrAm3PreX2 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]> +]>; +def A57WriteStrAm3PreWrBackX2 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPredX2, [A57WrBackTwo]>, + SchedVar<NoSchedPred, [A57WrBackOne]> +]>; +def : InstRW<[A57WriteStrAm3PreWrBackX2, A57WriteStrAm3PreX2], + (instregex "STRH_PRE")>; + +def A57WriteStrAm3PreX3 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPredX3, [A57Write_3cyc_1I_1S]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]> +]>; +def A57WriteStrAm3PreWrBackX3 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPredX3, [A57WrBackTwo]>, + SchedVar<NoSchedPred, [A57WrBackOne]> +]>; +def : InstRW<[A57WriteStrAm3PreWrBackX3, A57WriteStrAm3PreX3], + (instregex "STRD_PRE")>; + +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR(T?)_POST_IMM", + "STRB(T?)_POST_IMM", "t2STR(B?)_POST")>; + +// 1(2) "S, M" for STR/STRB register post-indexed (both scaled or not) +def : InstRW<[A57WrBackTwo, A57Write_1cyc_1S_1M], (instregex "STR(T?)_POST_REG", + "STRB(T?)_POST_REG", "STR(B?)T_POST$")>; + +// post-indexed STRH/STRD(STRH_POST, STRD_POST), STRHTi, STRHTr +// 1(1) "S, I0/I1" both for reg or imm +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], + (instregex "(t2)?STR(H|D)_POST", "STRHT(i|r)", "t2STRHT")>; + +// --- Store multiple instructions --- +// TODO: no writeback latency defined in documentation +def A57WriteSTM : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>, + SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>, + SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>, + SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>, + SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>, + SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>, + SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>, + SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1S]> +]>; +def A57WriteSTM_Upd : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>, + SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>, + SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>, + SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>, + SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>, + SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>, + SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>, + SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]> +]>; + +def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>; +def : InstRW<[A57WrBackOne, A57WriteSTM_Upd], + (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>; + +// --- 3.10 FP Data Processing Instructions --- +def : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>; +def : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>; + +def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>; + +// fp compare - 3cyc F1 for unconditional, 6cyc "F0/F1, F1" for conditional +def A57WriteVcmp : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57Write_6cyc_1V_1X]>, + SchedVar<NoSchedPred, [A57Write_3cyc_1X]> +]>; +def : InstRW<[A57WriteVcmp], + (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>; + +// fp convert +def : InstRW<[A57Write_5cyc_1V], (instregex + "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>; + +def : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>; + +// FP round to integral +def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>; + +// FP divide, FP square root +def : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>; +def : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>; +def : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>; +def : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>; + +// FP max/min +def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>; + +// FP multiply-accumulate pipelines support late forwarding of the result +// from FP multiply μops to the accumulate operands of an +// FP multiply-accumulate μop. The latter can potentially be issued 1 cycle +// after the FP multiply μop has been issued +// FP multiply, FZ +def A57WriteVMUL : SchedWriteRes<[A57UnitV]> { let Latency = 5; } + +def : SchedAlias<WriteFPMUL32, A57WriteVMUL>; +def : SchedAlias<WriteFPMUL64, A57WriteVMUL>; +def : ReadAdvance<ReadFPMUL, 0>; + +// FP multiply accumulate, FZ: 9cyc "F0/F1" or 4 cyc for sequenced accumulate +// VFMA, VFMS, VFNMA, VFNMS, VMLA, VMLS, VNMLA, VNMLS +def A57WriteVFMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; } + +// VFMA takes 9 cyc for common case and 4 cyc for VFMA->VFMA chain (5 read adv.) +// VMUL takes 5 cyc for common case and 1 cyc for VMUL->VFMA chain (4 read adv.) +// Currently, there is no way to define different read advances for VFMA operand +// from VFMA or from VMUL, so there will be 5 read advance. +// Zero latency (instead of one) for VMUL->VFMA shouldn't break something. +// The same situation with ASIMD VMUL/VFMA instructions +// def A57ReadVFMA : SchedRead; +// def : ReadAdvance<A57ReadVFMA, 5, [A57WriteVFMA]>; +// def : ReadAdvance<A57ReadVFMA, 4, [A57WriteVMUL]>; +def A57ReadVFMA5 : SchedReadAdvance<5, [A57WriteVFMA, A57WriteVMUL]>; + +def : SchedAlias<WriteFPMAC32, A57WriteVFMA>; +def : SchedAlias<WriteFPMAC64, A57WriteVFMA>; +def : SchedAlias<ReadFPMAC, A57ReadVFMA5>; + +def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>; +def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>; + +// --- 3.11 FP Miscellaneous Instructions --- +// VMOV: 3cyc "F0/F1" for imm/reg +def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>; +def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>; + +// 5cyc L for FP transfer, vfp to core reg, +// 5cyc L for FP transfer, core reg to vfp +def : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>; +// VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2). +def : InstRW<[A57Write_5cyc_1L, A57Write_5cyc_1L], (instregex "VMOV(RRS|RRD)")>; + +// 8cyc "L,F0/F1" for FP transfer, core reg to upper or lower half of vfp D-reg +def : InstRW<[A57Write_8cyc_1L_1I], (instregex "VMOVDRR")>; + +// --- 3.12 FP Load Instructions --- +def : InstRW<[A57Write_5cyc_1L], (instregex "VLDR(D|S|H)")>; + +def : InstRW<[A57Write_5cyc_1L], (instregex "VLDMQIA$")>; + +// FP load multiple (VLDM) + +def A57VLDMOpsListUncond : A57WriteLMOpsListType< + [A57Write_5cyc_1L, A57Write_5cyc_1L, + A57Write_6cyc_1L, A57Write_6cyc_1L, + A57Write_7cyc_1L, A57Write_7cyc_1L, + A57Write_8cyc_1L, A57Write_8cyc_1L, + A57Write_9cyc_1L, A57Write_9cyc_1L, + A57Write_10cyc_1L, A57Write_10cyc_1L, + A57Write_11cyc_1L, A57Write_11cyc_1L, + A57Write_12cyc_1L, A57Write_12cyc_1L]>; +def A57WriteVLDMuncond : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond.Writes[0-1]>, + SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond.Writes[0-3]>, + SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond.Writes[0-5]>, + SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond.Writes[0-7]>, + SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond.Writes[0-9]>, + SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond.Writes[0-11]>, + SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond.Writes[0-13]>, + SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond.Writes[0-15]>, + SchedVar<NoSchedPred, A57VLDMOpsListUncond.Writes[0-15]> +]> { let Variadic=1; } + +def A57VLDMOpsListCond : A57WriteLMOpsListType< + [A57Write_5cyc_1L, A57Write_6cyc_1L, + A57Write_7cyc_1L, A57Write_8cyc_1L, + A57Write_9cyc_1L, A57Write_10cyc_1L, + A57Write_11cyc_1L, A57Write_12cyc_1L, + A57Write_13cyc_1L, A57Write_14cyc_1L, + A57Write_15cyc_1L, A57Write_16cyc_1L, + A57Write_17cyc_1L, A57Write_18cyc_1L, + A57Write_19cyc_1L, A57Write_20cyc_1L]>; +def A57WriteVLDMcond : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, A57VLDMOpsListCond.Writes[0-1]>, + SchedVar<A57LMAddrPred2, A57VLDMOpsListCond.Writes[0-3]>, + SchedVar<A57LMAddrPred3, A57VLDMOpsListCond.Writes[0-5]>, + SchedVar<A57LMAddrPred4, A57VLDMOpsListCond.Writes[0-7]>, + SchedVar<A57LMAddrPred5, A57VLDMOpsListCond.Writes[0-9]>, + SchedVar<A57LMAddrPred6, A57VLDMOpsListCond.Writes[0-11]>, + SchedVar<A57LMAddrPred7, A57VLDMOpsListCond.Writes[0-13]>, + SchedVar<A57LMAddrPred8, A57VLDMOpsListCond.Writes[0-15]>, + SchedVar<NoSchedPred, A57VLDMOpsListCond.Writes[0-15]> +]> { let Variadic=1; } + +def A57WriteVLDM : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57WriteVLDMcond]>, + SchedVar<NoSchedPred, [A57WriteVLDMuncond]> +]> { let Variadic=1; } + +def : InstRW<[A57WriteVLDM], (instregex "VLDM(DIA|SIA)$")>; + +def A57VLDMOpsListUncond_Upd : A57WriteLMOpsListType< + [A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, + A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, + A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, + A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I, + A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I, + A57Write_12cyc_1L_1I, A57Write_12cyc_1L_1I]>; +def A57WriteVLDMuncond_UPD : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond_Upd.Writes[0-1]>, + SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond_Upd.Writes[0-3]>, + SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond_Upd.Writes[0-5]>, + SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond_Upd.Writes[0-7]>, + SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond_Upd.Writes[0-9]>, + SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond_Upd.Writes[0-11]>, + SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond_Upd.Writes[0-13]>, + SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond_Upd.Writes[0-15]>, + SchedVar<NoSchedPred, A57VLDMOpsListUncond_Upd.Writes[0-15]> +]> { let Variadic=1; } + +def A57VLDMOpsListCond_Upd : A57WriteLMOpsListType< + [A57Write_5cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_10cyc_1L_1I, + A57Write_11cyc_1L_1I, A57Write_12cyc_1L_1I, + A57Write_13cyc_1L_1I, A57Write_14cyc_1L_1I, + A57Write_15cyc_1L_1I, A57Write_16cyc_1L_1I, + A57Write_17cyc_1L_1I, A57Write_18cyc_1L_1I, + A57Write_19cyc_1L_1I, A57Write_20cyc_1L_1I]>; +def A57WriteVLDMcond_UPD : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, A57VLDMOpsListCond_Upd.Writes[0-1]>, + SchedVar<A57LMAddrPred2, A57VLDMOpsListCond_Upd.Writes[0-3]>, + SchedVar<A57LMAddrPred3, A57VLDMOpsListCond_Upd.Writes[0-5]>, + SchedVar<A57LMAddrPred4, A57VLDMOpsListCond_Upd.Writes[0-7]>, + SchedVar<A57LMAddrPred5, A57VLDMOpsListCond_Upd.Writes[0-9]>, + SchedVar<A57LMAddrPred6, A57VLDMOpsListCond_Upd.Writes[0-11]>, + SchedVar<A57LMAddrPred7, A57VLDMOpsListCond_Upd.Writes[0-13]>, + SchedVar<A57LMAddrPred8, A57VLDMOpsListCond_Upd.Writes[0-15]>, + SchedVar<NoSchedPred, A57VLDMOpsListCond_Upd.Writes[0-15]> +]> { let Variadic=1; } + +def A57WriteVLDM_UPD : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57WriteVLDMcond_UPD]>, + SchedVar<NoSchedPred, [A57WriteVLDMuncond_UPD]> +]> { let Variadic=1; } + +def : InstRW<[A57WrBackOne, A57WriteVLDM_UPD], + (instregex "VLDM(DIA_UPD|DDB_UPD|SIA_UPD|SDB_UPD)")>; + +// --- 3.13 FP Store Instructions --- +def : InstRW<[A57Write_1cyc_1S], (instregex "VSTR(D|S|H)")>; + +def : InstRW<[A57Write_2cyc_1S], (instregex "VSTMQIA$")>; + +def A57WriteVSTMs : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>, + SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>, + SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>, + SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>, + SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>, + SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>, + SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>, + SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1S]> +]>; +def A57WriteVSTMd : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S]>, + SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S]>, + SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S]>, + SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S]>, + SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S]>, + SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S]>, + SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S]>, + SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1S]> +]>; +def A57WriteVSTMs_Upd : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>, + SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>, + SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>, + SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>, + SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>, + SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>, + SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>, + SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]> +]>; +def A57WriteVSTMd_Upd : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S_1I]>, + SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S_1I]>, + SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S_1I]>, + SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S_1I]>, + SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S_1I]>, + SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S_1I]>, + SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S_1I]>, + SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S_1I]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]> +]>; + +def : InstRW<[A57WriteVSTMs], (instregex "VSTMSIA$")>; +def : InstRW<[A57WriteVSTMd], (instregex "VSTMDIA$")>; +def : InstRW<[A57WrBackOne, A57WriteVSTMs_Upd], + (instregex "VSTM(SIA_UPD|SDB_UPD)")>; +def : InstRW<[A57WrBackOne, A57WriteVSTMd_Upd], + (instregex "VSTM(DIA_UPD|DDB_UPD)")>; + +// --- 3.14 ASIMD Integer Instructions --- + +// ASIMD absolute diff, 3cyc F0/F1 for integer VABD +def : InstRW<[A57Write_3cyc_1V], (instregex "VABD(s|u)")>; + +// ASIMD absolute diff accum: 4(1) F1 for D-form, 5(2) F1 for Q-form +def A57WriteVABAD : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVABAD : SchedReadAdvance<3, [A57WriteVABAD]>; +def : InstRW<[A57WriteVABAD, A57ReadVABAD], + (instregex "VABA(s|u)(v8i8|v4i16|v2i32)")>; +def A57WriteVABAQ : SchedWriteRes<[A57UnitX]> { let Latency = 5; } +def A57ReadVABAQ : SchedReadAdvance<3, [A57WriteVABAQ]>; +def : InstRW<[A57WriteVABAQ, A57ReadVABAQ], + (instregex "VABA(s|u)(v16i8|v8i16|v4i32)")>; + +// ASIMD absolute diff accum long: 4(1) F1 for VABAL +def A57WriteVABAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVABAL : SchedReadAdvance<3, [A57WriteVABAL]>; +def : InstRW<[A57WriteVABAL, A57ReadVABAL], (instregex "VABAL(s|u)")>; + +// ASIMD absolute diff long: 3cyc F0/F1 for VABDL +def : InstRW<[A57Write_3cyc_1V], (instregex "VABDL(s|u)")>; + +// ASIMD arith, basic +def : InstRW<[A57Write_3cyc_1V], (instregex "VADD", "VADDL", "VADDW", + "VNEG(s8d|s16d|s32d|s8q|s16q|s32q|d|q)", + "VPADDi", "VPADDL", "VSUB", "VSUBL", "VSUBW")>; + +// ASIMD arith, complex +def : InstRW<[A57Write_3cyc_1V], (instregex "VABS", "VADDHN", "VHADD", "VHSUB", + "VQABS", "VQADD", "VQNEG", "VQSUB", + "VRADDHN", "VRHADD", "VRSUBHN", "VSUBHN")>; + +// ASIMD compare +def : InstRW<[A57Write_3cyc_1V], + (instregex "VCEQ", "VCGE", "VCGT", "VCLE", "VTST", "VCLT")>; + +// ASIMD logical +def : InstRW<[A57Write_3cyc_1V], + (instregex "VAND", "VBIC", "VMVN", "VORR", "VORN", "VEOR")>; + +// ASIMD max/min +def : InstRW<[A57Write_3cyc_1V], + (instregex "(VMAX|VMIN)(s|u)", "(VPMAX|VPMIN)(s8|s16|s32|u8|u16|u32)")>; + +// ASIMD multiply, D-form: 5cyc F0 for r0px, 4cyc F0 for r1p0 and later +// Cortex-A57 r1p0 and later reduce the latency of ASIMD multiply +// and multiply-with-accumulate instructions relative to r0pX. +def A57WriteVMULD_VecInt : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, + SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; +def : InstRW<[A57WriteVMULD_VecInt], (instregex + "VMUL(v8i8|v4i16|v2i32|pd)", "VMULsl(v4i16|v2i32)", + "VQDMULH(sl)?(v4i16|v2i32)", "VQRDMULH(sl)?(v4i16|v2i32)")>; + +// ASIMD multiply, Q-form: 6cyc F0 for r0px, 5cyc F0 for r1p0 and later +def A57WriteVMULQ_VecInt : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>, + SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>; +def : InstRW<[A57WriteVMULQ_VecInt], (instregex + "VMUL(v16i8|v8i16|v4i32|pq)", "VMULsl(v8i16|v4i32)", + "VQDMULH(sl)?(v8i16|v4i32)", "VQRDMULH(sl)?(v8i16|v4i32)")>; + +// ASIMD multiply accumulate, D-form +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence +// (4 or 3 ReadAdvance) +def A57WriteVMLAD_VecInt : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, + SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; +def A57ReadVMLAD_VecInt : SchedReadVariant<[ + SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAD_VecInt]>]>, + SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAD_VecInt]>]> +]>; +def : InstRW<[A57WriteVMLAD_VecInt, A57ReadVMLAD_VecInt], + (instregex "VMLA(sl)?(v8i8|v4i16|v2i32)", "VMLS(sl)?(v8i8|v4i16|v2i32)")>; + +// ASIMD multiply accumulate, Q-form +// 6cyc F0 for r0px, 5cyc F0 for r1p0 and later, 2cyc for accumulate sequence +// (4 or 3 ReadAdvance) +def A57WriteVMLAQ_VecInt : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>, + SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>; +def A57ReadVMLAQ_VecInt : SchedReadVariant<[ + SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAQ_VecInt]>]>, + SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAQ_VecInt]>]> +]>; +def : InstRW<[A57WriteVMLAQ_VecInt, A57ReadVMLAQ_VecInt], + (instregex "VMLA(sl)?(v16i8|v8i16|v4i32)", "VMLS(sl)?(v16i8|v8i16|v4i32)")>; + +// ASIMD multiply accumulate long +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence +// (4 or 3 ReadAdvance) +def A57WriteVMLAL_VecInt : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, + SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; +def A57ReadVMLAL_VecInt : SchedReadVariant<[ + SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAL_VecInt]>]>, + SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAL_VecInt]>]> +]>; +def : InstRW<[A57WriteVMLAL_VecInt, A57ReadVMLAL_VecInt], + (instregex "VMLAL(s|u)", "VMLSL(s|u)")>; + +// ASIMD multiply accumulate saturating long +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 2cyc for accumulate sequence +// (3 or 2 ReadAdvance) +def A57WriteVQDMLAL_VecInt : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, + SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; +def A57ReadVQDMLAL_VecInt : SchedReadVariant<[ + SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<2, [A57WriteVQDMLAL_VecInt]>]>, + SchedVar<NoSchedPred, [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]> +]>; +def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt], + (instregex "VQDMLAL", "VQDMLSL")>; + +// ASIMD multiply long +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later +def A57WriteVMULL_VecInt : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, + SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; +def : InstRW<[A57WriteVMULL_VecInt], + (instregex "VMULL(s|u|p8|sls|slu)", "VQDMULL")>; + +// ASIMD pairwise add and accumulate +// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance) +def A57WriteVPADAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVPADAL : SchedReadAdvance<3, [A57WriteVPADAL]>; +def : InstRW<[A57WriteVPADAL, A57ReadVPADAL], (instregex "VPADAL(s|u)")>; + +// ASIMD shift accumulate +// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance) +def A57WriteVSRA : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVSRA : SchedReadAdvance<3, [A57WriteVSRA]>; +def : InstRW<[A57WriteVSRA, A57ReadVSRA], (instregex "VSRA", "VRSRA")>; + +// ASIMD shift by immed, basic +def : InstRW<[A57Write_3cyc_1X], + (instregex "VMOVL", "VSHLi", "VSHLL", "VSHR(s|u)", "VSHRN")>; + +// ASIMD shift by immed, complex +def : InstRW<[A57Write_4cyc_1X], (instregex + "VQRSHRN", "VQRSHRUN", "VQSHL(si|ui|su)", "VQSHRN", "VQSHRUN", "VRSHR(s|u)", + "VRSHRN")>; + +// ASIMD shift by immed and insert, basic, D-form +def : InstRW<[A57Write_4cyc_1X], (instregex + "VSLI(v8i8|v4i16|v2i32|v1i64)", "VSRI(v8i8|v4i16|v2i32|v1i64)")>; + +// ASIMD shift by immed and insert, basic, Q-form +def : InstRW<[A57Write_5cyc_1X], (instregex + "VSLI(v16i8|v8i16|v4i32|v2i64)", "VSRI(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD shift by register, basic, D-form +def : InstRW<[A57Write_3cyc_1X], (instregex + "VSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>; + +// ASIMD shift by register, basic, Q-form +def : InstRW<[A57Write_4cyc_1X], (instregex + "VSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD shift by register, complex, D-form +// VQRSHL, VQSHL, VRSHL +def : InstRW<[A57Write_4cyc_1X], (instregex + "VQRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", "VQSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", + "VRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>; + +// ASIMD shift by register, complex, Q-form +def : InstRW<[A57Write_5cyc_1X], (instregex + "VQRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", "VQSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", + "VRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>; + +// --- 3.15 ASIMD Floating-Point Instructions --- +// ASIMD FP absolute value +def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(fd|fq|hd|hq)")>; + +// ASIMD FP arith +def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)", + "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>; + +// ASIMD FP compare +def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)", + "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>; + +// ASIMD FP convert, integer +def : InstRW<[A57Write_5cyc_1V], (instregex + "VCVT(f2sd|f2ud|s2fd|u2fd|f2sq|f2uq|s2fq|u2fq|f2xsd|f2xud|xs2fd|xu2fd)", + "VCVT(f2xsq|f2xuq|xs2fq|xu2fq)", + "VCVT(AN|MN|NN|PN)(SDf|SQf|UDf|UQf|SDh|SQh|UDh|UQh)")>; + +// ASIMD FP convert, half-precision: 8cyc F0/F1 +def : InstRW<[A57Write_8cyc_1V], (instregex + "VCVT(h2sd|h2ud|s2hd|u2hd|h2sq|h2uq|s2hq|u2hq|h2xsd|h2xud|xs2hd|xu2hd)", + "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)", + "VCVT(f2h|h2f)")>; + +// ASIMD FP max/min +def : InstRW<[A57Write_5cyc_1V], (instregex + "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "VMAXNM", "VMINNM")>; + +// ASIMD FP multiply +def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; } +def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>; + +// ASIMD FP multiply accumulate: 9cyc F0/F1, 4cyc for accumulate sequence +def A57WriteVMLA_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57ReadVMLA_VecFP : + SchedReadAdvance<5, [A57WriteVMLA_VecFP, A57WriteVMUL_VecFP]>; +def : InstRW<[A57WriteVMLA_VecFP, A57ReadVMLA_VecFP], + (instregex "(VMLA|VMLS)(sl)?(fd|fq|hd|hq)", "(VFMA|VFMS)(fd|fq|hd|hq)")>; + +// ASIMD FP negate +def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG(fd|f32q|hd|hq)")>; + +// ASIMD FP round to integral +def : InstRW<[A57Write_5cyc_1V], (instregex + "VRINT(AN|MN|NN|PN|XN|ZN)(Df|Qf|Dh|Qh)")>; + +// --- 3.16 ASIMD Miscellaneous Instructions --- + +// ASIMD bitwise insert +def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL")>; + +// ASIMD count +def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>; + +// ASIMD duplicate, core reg: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VDUP(8|16|32)(d|q)")>; + +// ASIMD duplicate, scalar: 3cyc "F0/F1" +def : InstRW<[A57Write_3cyc_1V], (instregex "VDUPLN(8|16|32)(d|q)")>; + +// ASIMD extract +def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>; + +// ASIMD move, immed +def : InstRW<[A57Write_3cyc_1V], (instregex + "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)", + "VMOVQ0")>; + +// ASIMD move, narrowing +def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>; + +// ASIMD move, saturating +def : InstRW<[A57Write_4cyc_1X], (instregex "VQMOVN")>; + +// ASIMD reciprocal estimate +def : InstRW<[A57Write_5cyc_1V], (instregex "VRECPE", "VRSQRTE")>; + +// ASIMD reciprocal step, FZ +def : InstRW<[A57Write_9cyc_1V], (instregex "VRECPS", "VRSQRTS")>; + +// ASIMD reverse, swap, table lookup (1-2 reg) +def : InstRW<[A57Write_3cyc_1V], (instregex "VREV", "VSWP", "VTB(L|X)(1|2)")>; + +// ASIMD table lookup (3-4 reg) +def : InstRW<[A57Write_6cyc_1V], (instregex "VTBL(3|4)", "VTBX(3|4)")>; + +// ASIMD transfer, scalar to core reg: 6cyc "L, I0/I1" +def : InstRW<[A57Write_6cyc_1L_1I], (instregex "VGETLN")>; + +// ASIMD transfer, core reg to scalar: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VSETLN")>; + +// ASIMD transpose +def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], (instregex "VTRN")>; + +// ASIMD unzip/zip, D-form +def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], + (instregex "VUZPd", "VZIPd")>; + +// ASIMD unzip/zip, Q-form +def : InstRW<[A57Write_6cyc_1V, A57Write_6cyc_1V], + (instregex "VUZPq", "VZIPq")>; + +// --- 3.17 ASIMD Load Instructions --- + +// Overriden via InstRW for this processor. +def : WriteRes<WriteVLD1, []>; +def : WriteRes<WriteVLD2, []>; +def : WriteRes<WriteVLD3, []>; +def : WriteRes<WriteVLD4, []>; +def : WriteRes<WriteVST1, []>; +def : WriteRes<WriteVST2, []>; +def : WriteRes<WriteVST3, []>; +def : WriteRes<WriteVST4, []>; + +// 1-2 reg: 5cyc L, +I for writeback, 1 cyc wb latency +def : InstRW<[A57Write_5cyc_1L], (instregex "VLD1(d|q)(8|16|32|64)$")>; +def : InstRW<[A57Write_5cyc_1L_1I, A57WrBackOne], + (instregex "VLD1(d|q)(8|16|32|64)wb")>; + +// 3-4 reg: 6cyc L, +I for writeback, 1 cyc wb latency +def : InstRW<[A57Write_6cyc_1L], + (instregex "VLD1(d|q)(8|16|32|64)(T|Q)$", "VLD1d64(T|Q)Pseudo")>; + +def : InstRW<[A57Write_6cyc_1L_1I, A57WrBackOne], + (instregex "VLD1(d|q)(8|16|32|64)(T|Q)wb")>; + +// ASIMD load, 1 element, one lane and all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], (instregex + "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], (instregex + "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", "VLD1LNq(8|16|32)Pseudo_UPD")>; + +// ASIMD load, 2 element, multiple, 2 reg: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], + (instregex "VLD2(d|q)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD2(d|q)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>; + +// ASIMD load, 2 element, multiple, 4 reg: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V], (instregex "VLD2b(8|16|32)$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD2b(8|16|32)wb")>; + +// ASIMD load, 2 element, one lane and all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], + (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$", + "VLD2LN(d|q)(8|16|32)Pseudo$")>; +// 2 results + wb result +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V, A57WrBackOne], + (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>; +// 1 result + wb result +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb", + "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>; + +// ASIMD load, 3 element, multiple, 3 reg: 9cyc "L, F0/F1" +// 3 results +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V], + (instregex "VLD3(d|q)(8|16|32)$")>; +// 1 result +def : InstRW<[A57Write_9cyc_1L_1V], + (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>; +// 3 results + wb +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3(d|q)(8|16|32)_UPD$")>; +// 1 result + wb +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>; + +// ASIMD load, 3 element, one lane, size 32: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], + (instregex "VLD3LN(d|q)32$", + "VLD3LN(d|q)32Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)32_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)32Pseudo_UPD")>; + +// ASIMD load, 3 element, one lane, size 8/16: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V], + (instregex "VLD3LN(d|q)(8|16)$", + "VLD3LN(d|q)(8|16)Pseudo$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)(8|16)_UPD")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)(8|16)Pseudo_UPD")>; + +// ASIMD load, 3 element, all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], + (instregex "VLD3DUP(d|q)(8|16|32)$", + "VLD3DUP(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3DUP(d|q)(8|16|32)_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3DUP(d|q)(8|16|32)Pseudo_UPD")>; + +// ASIMD load, 4 element, multiple, 4 reg: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, + A57Write_9cyc_1L_1V], + (instregex "VLD4(d|q)(8|16|32)$")>; +def : InstRW<[A57Write_9cyc_1L_1V], + (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4(d|q)(8|16|32)_UPD")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; + +// ASIMD load, 4 element, one lane, size 32: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, + A57Write_8cyc_1L_1V], + (instregex "VLD4LN(d|q)32$", + "VLD4LN(d|q)32Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57WrBackOne], + (instregex "VLD4LN(d|q)32_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4LN(d|q)32Pseudo_UPD")>; + +// ASIMD load, 4 element, one lane, size 8/16: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, + A57Write_9cyc_1L_1V], + (instregex "VLD4LN(d|q)(8|16)$", + "VLD4LN(d|q)(8|16)Pseudo$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57WrBackOne], + (instregex "VLD4LN(d|q)(8|16)_UPD")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4LN(d|q)(8|16)Pseudo_UPD")>; + +// ASIMD load, 4 element, all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, + A57Write_8cyc_1L_1V], + (instregex "VLD4DUP(d|q)(8|16|32)$", + "VLD4DUP(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57WrBackOne], + (instregex "VLD4DUP(d|q)(8|16|32)_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4DUP(d|q)(8|16|32)Pseudo_UPD")>; + +// --- 3.18 ASIMD Store Instructions --- + +// ASIMD store, 1 element, multiple, 1 reg: 1cyc S +def : InstRW<[A57Write_1cyc_1S], (instregex "VST1d(8|16|32|64)$")>; +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], + (instregex "VST1d(8|16|32|64)wb")>; +// ASIMD store, 1 element, multiple, 2 reg: 2cyc S +def : InstRW<[A57Write_2cyc_1S], (instregex "VST1q(8|16|32|64)$")>; +def : InstRW<[A57WrBackOne, A57Write_2cyc_1S_1I], + (instregex "VST1q(8|16|32|64)wb")>; +// ASIMD store, 1 element, multiple, 3 reg: 3cyc S +def : InstRW<[A57Write_3cyc_1S], + (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1I], + (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>; +// ASIMD store, 1 element, multiple, 4 reg: 4cyc S +def : InstRW<[A57Write_4cyc_1S], + (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>; +def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1I], + (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>; +// ASIMD store, 1 element, one lane: 3cyc "F0/F1, S" +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>; +// ASIMD store, 2 element, multiple, 2 reg: 3cyc "F0/F1, S" +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST2(d|b)(8|16|32)$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST2(b|d)(8|16|32)wb")>; +// ASIMD store, 2 element, multiple, 4 reg: 4cyc "F0/F1, S" +def : InstRW<[A57Write_4cyc_1S_1V], + (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I], + (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>; +// ASIMD store, 2 element, one lane: 3cyc "F0/F1, S" +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST2LN(d|q)(8|16|32)_UPD", + "VST2LN(d|q)(8|16|32)Pseudo_UPD")>; +// ASIMD store, 3 element, multiple, 3 reg +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST3(d|q)(8|16|32)_UPD", + "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; +// ASIMD store, 3 element, one lane +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST3LN(d|q)(8|16|32)_UPD", + "VST3LN(d|q)(8|16|32)Pseudo_UPD")>; +// ASIMD store, 4 element, multiple, 4 reg +def : InstRW<[A57Write_4cyc_1S_1V], + (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>; +def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I], + (instregex "VST4(d|q)(8|16|32)_UPD", + "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; +// ASIMD store, 4 element, one lane +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST4LN(d|q)(8|16|32)_UPD", + "VST4LN(d|q)(8|16|32)Pseudo_UPD")>; + +// --- 3.19 Cryptography Extensions --- +// Crypto AES ops +// AESD, AESE, AESIMC, AESMC: 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>; +// Crypto polynomial (64x64) multiply long (VMULL.P64): 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^VMULLp64")>; +// Crypto SHA1 xor ops: 6cyc F0/F1 +def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>; +// Crypto SHA1 fast ops: 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>; +// Crypto SHA1 slow ops: 6cyc F0 +def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>; +// Crypto SHA256 fast ops: 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>; +// Crypto SHA256 slow ops: 6cyc F0 +def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>; + +// --- 3.20 CRC --- +def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>; + +// ----------------------------------------------------------------------------- +// Common definitions +def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } +def : SchedAlias<WriteALU, A57Write_1cyc_1I>; + +def : SchedAlias<WriteBr, A57Write_1cyc_1B>; +def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>; +def : SchedAlias<WriteBrTbl, A57Write_1cyc_1B_1I>; +def : SchedAlias<WritePreLd, A57Write_4cyc_1L>; + +def : SchedAlias<WriteLd, A57Write_4cyc_1L>; +def : SchedAlias<WriteST, A57Write_1cyc_1S>; +def : ReadAdvance<ReadALU, 0>; + +} // SchedModel = CortexA57Model + diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td new file mode 100644 index 0000000..670717d --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td @@ -0,0 +1,323 @@ +//=- ARMScheduleA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach +// below is to define a generic SchedWriteRes for every combination of +// latency and microOps. The naming conventions is to use a prefix, one field +// for latency, and one or more microOp count/type designators. +// Prefix: A57Write +// Latency: #cyc +// MicroOp Count/Types: #(B|I|M|L|S|X|W|V) +// +// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are +// 11 micro-ops to be issued as follows: one to I pipe, six to S pipes and +// four to V pipes. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define Generic 1 micro-op types + +def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; } +def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; } +def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; } +def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; } +def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17; + let ResourceCycles = [17]; } +def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18; + let ResourceCycles = [18]; } +def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19; + let ResourceCycles = [19]; } +def A57Write_20cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 20; + let ResourceCycles = [20]; } +def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; } +def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; } +def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2; } +def A57Write_3cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 3; } +def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; } +def A57Write_2cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 2; } +def A57Write_3cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 3; } +def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; } +def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32; + let ResourceCycles = [32]; } +def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32; + let ResourceCycles = [32]; } +def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35; + let ResourceCycles = [35]; } +def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; } +def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; } +def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; } +def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; } + +// A57Write_3cyc_1L - A57Write_20cyc_1L +foreach Lat = 3-20 in { + def A57Write_#Lat#cyc_1L : SchedWriteRes<[A57UnitL]> { + let Latency = Lat; + } +} + +// A57Write_4cyc_1S - A57Write_16cyc_1S +foreach Lat = 4-16 in { + def A57Write_#Lat#cyc_1S : SchedWriteRes<[A57UnitS]> { + let Latency = Lat; + } +} + +def A57Write_4cyc_1M : SchedWriteRes<[A57UnitL]> { let Latency = 4; } +def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57Write_4cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 4; } +def A57Write_5cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 5; } +def A57Write_6cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 6; } +def A57Write_6cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 6; } +def A57Write_8cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 8; } +def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; } +def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; } + + +//===----------------------------------------------------------------------===// +// Define Generic 2 micro-op types + +def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 64; + let NumMicroOps = 2; + let ResourceCycles = [32, 32]; +} +def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_1V_1X : SchedWriteRes<[A57UnitV, + A57UnitX]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV, + A57UnitX]> { + let Latency = 7; + let NumMicroOps = 2; +} +def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_9cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 9; + let NumMicroOps = 2; +} +def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 2; +} +def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_1cyc_1S_1I : SchedWriteRes<[A57UnitS, + A57UnitI]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_2cyc_1S_1I : SchedWriteRes<[A57UnitS, + A57UnitI]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_3cyc_1S_1I : SchedWriteRes<[A57UnitS, + A57UnitI]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_1cyc_1S_1M : SchedWriteRes<[A57UnitS, + A57UnitM]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_3cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_6cyc_1B_1L : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_2cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 36; + let NumMicroOps = 2; + let ResourceCycles = [18, 18]; +} +def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_4cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// A57Write_3cyc_1L_1I - A57Write_20cyc_1L_1I +foreach Lat = 3-20 in { + def A57Write_#Lat#cyc_1L_1I : SchedWriteRes<[A57UnitL, A57UnitI]> { + let Latency = Lat; let NumMicroOps = 2; + } +} + +def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_4cyc_1S_1V : SchedWriteRes<[A57UnitS, + A57UnitV]> { + let Latency = 4; + let NumMicroOps = 2; +} +def A57Write_3cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 3; + let NumMicroOps = 2; +} + +// A57Write_4cyc_1S_1I - A57Write_16cyc_1S_1I +foreach Lat = 4-16 in { + def A57Write_#Lat#cyc_1S_1I : SchedWriteRes<[A57UnitS, A57UnitI]> { + let Latency = Lat; let NumMicroOps = 2; + } +} + +def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 4; + let NumMicroOps = 2; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 3 micro-op types + +def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 3; +} +def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 3; +} +def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI, + A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_1S_1V_1I : SchedWriteRes<[A57UnitS, + A57UnitV, + A57UnitI]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_4cyc_1S_1V_1I : SchedWriteRes<[A57UnitS, + A57UnitV, + A57UnitI]> { + let Latency = 4; + let NumMicroOps = 3; +} +def A57Write_4cyc_1I_1L_1M : SchedWriteRes<[A57UnitI, A57UnitL, A57UnitM]> { + let Latency = 4; + let NumMicroOps = 3; +} +def A57Write_8cyc_1L_1V_1I : SchedWriteRes<[A57UnitL, + A57UnitV, + A57UnitI]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_9cyc_1L_1V_1I : SchedWriteRes<[A57UnitL, + A57UnitV, + A57UnitI]> { + let Latency = 9; + let NumMicroOps = 3; +} diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td index 519e595..4e72b13 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td @@ -1944,6 +1944,16 @@ def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5; def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; } def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4; let NumMicroOps = 0; } +def : SchedAlias<WriteMUL16, A9WriteM16>; +def : SchedAlias<WriteMUL32, A9WriteM>; +def : SchedAlias<WriteMUL64Lo, A9WriteM>; +def : SchedAlias<WriteMUL64Hi, A9WriteMHi>; +def : SchedAlias<WriteMAC16, A9WriteM16>; +def : SchedAlias<WriteMAC32, A9WriteM>; +def : SchedAlias<WriteMAC64Lo, A9WriteM>; +def : SchedAlias<WriteMAC64Hi, A9WriteMHi>; +def : ReadAdvance<ReadMUL, 0>; +def : ReadAdvance<ReadMAC, 0>; // Floating-point // Only one FP or AGU instruction may issue per cycle. We model this @@ -1953,6 +1963,7 @@ def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; } + def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; } def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; } @@ -1970,6 +1981,15 @@ def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; } def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; } +def : WriteRes<WriteVLD1, []>; +def : WriteRes<WriteVLD2, []>; +def : WriteRes<WriteVLD3, []>; +def : WriteRes<WriteVLD4, []>; +def : WriteRes<WriteVST1, []>; +def : WriteRes<WriteVST2, []>; +def : WriteRes<WriteVST3, []>; +def : WriteRes<WriteVST4, []>; + // Reserve A9UnitFP for 2 consecutive cycles. def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; @@ -1992,6 +2012,7 @@ def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; } // Load Integer. def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; } +def : SchedAlias<WriteLd, A9WriteL>; // Load the upper 32-bits using the same micro-op. def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3; let NumMicroOps = 0; } @@ -2471,6 +2492,34 @@ def : SchedAlias<WriteALUsr, A9WriteALUsr>; def : SchedAlias<WriteALUSsr, A9WriteALUsr>; def : SchedAlias<ReadALU, A9ReadALU>; def : SchedAlias<ReadALUsr, A9ReadALU>; +def : SchedAlias<WriteST, A9WriteS>; + +// ===---------------------------------------------------------------------===// +// Floating-point. Map target defined SchedReadWrite to processor specific ones +// +def : WriteRes<WriteFPCVT, [A9UnitFP, A9UnitAGU]> { let Latency = 4; } +def : SchedAlias<WriteFPMOV, A9WriteFMov>; + +def : SchedAlias<WriteFPALU32, A9WriteF>; +def : SchedAlias<WriteFPALU64, A9WriteF>; + +def : SchedAlias<WriteFPMUL32, A9WriteFMulS>; +def : SchedAlias<WriteFPMUL64, A9WriteFMulD>; + +def : SchedAlias<WriteFPMAC32, A9WriteFMAS>; +def : SchedAlias<WriteFPMAC64, A9WriteFMAD>; + +def : SchedAlias<WriteFPDIV32, A9WriteFDivS>; +def : SchedAlias<WriteFPDIV64, A9WriteFDivD>; +def : SchedAlias<WriteFPSQRT32, A9WriteFSqrtS>; +def : SchedAlias<WriteFPSQRT64, A9WriteFSqrtD>; + +def : ReadAdvance<ReadFPMUL, 0>; +def : ReadAdvance<ReadFPMAC, 0>; + +// ===---------------------------------------------------------------------===// +// Subtarget-specific overrides. Map opcodes to list of SchedReadWrite types. +// def : InstRW< [WriteALU], (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr", "BICrr")>; @@ -2518,12 +2567,11 @@ def : InstRW<[A9WriteLb], "LDRH", "LDRSH", "LDRSB")>; def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>; -def : WriteRes<WriteDiv, []> { let Latency = 0; } +def : WriteRes<WriteDIV, []> { let Latency = 0; } def : WriteRes<WriteBr, [A9UnitB]>; def : WriteRes<WriteBrL, [A9UnitB]>; def : WriteRes<WriteBrTbl, [A9UnitB]>; def : WriteRes<WritePreLd, []>; -def : SchedAlias<WriteCvtFP, A9WriteF>; def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } } // SchedModel = CortexA9Model diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleM3.td b/contrib/llvm/lib/Target/ARM/ARMScheduleM3.td new file mode 100644 index 0000000..93f8299 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleM3.td @@ -0,0 +1,21 @@ +//=- ARMScheduleM3.td - ARM Cortex-M3 Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for the ARM Cortex-M3 processor. +// +//===----------------------------------------------------------------------===// + +def CortexM3Model : SchedMachineModel { + let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue + let MicroOpBufferSize = 0; // In-order + let LoadLatency = 2; // Latency when not pipelined, not pc-relative + let MispredictPenalty = 2; // Best case branch taken cost + + let CompleteModel = 0; +} diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td b/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td index 1b40742..782be9b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td @@ -70,15 +70,13 @@ def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; } def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; } def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; } +// Multiply - aliased to sub-target specific later + // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2) -def : WriteRes<WriteDiv, [R52UnitDiv]> { - let Latency = 8; let ResourceCycles = [8]; // not pipelined +def : WriteRes<WriteDIV, [R52UnitDiv]> { + let Latency = 8; let ResourceCycles = [8]; // non-pipelined } -// Loads -def : WriteRes<WriteLd, [R52UnitLd]> { let Latency = 4; } -def : WriteRes<WritePreLd, [R52UnitLd]> { let Latency = 4; } - // Branches - LR written in Late EX2 def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; } def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; } @@ -86,11 +84,50 @@ def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; } // Misc def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } -def : WriteRes<WriteCvtFP, [R52UnitALU]> { let Latency = 3; } +// Integer pipeline by-passes def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS +def : ReadAdvance<ReadMUL, 0>; +def : ReadAdvance<ReadMAC, 0>; + +// Floating-point. Map target-defined SchedReadWrites to subtarget +def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; } + +def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> { + let Latency = 6; +} + +def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> { + let Latency = 11; // as it is internally two insns (MUL then ADD) +} + +def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL, + R52UnitFPALU, R52UnitFPALU]> { + let Latency = 11; +} + +def : WriteRes<WriteFPDIV32, [R52UnitDiv]> { + let Latency = 7; // FP div takes fixed #cycles + let ResourceCycles = [7]; // is not pipelined +} + +def : WriteRes<WriteFPDIV64, [R52UnitDiv]> { + let Latency = 17; + let ResourceCycles = [17]; +} + +def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; } +def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; } +// Overriden via InstRW for this processor. +def : WriteRes<WriteVST1, []>; +def : WriteRes<WriteVST2, []>; +def : WriteRes<WriteVST3, []>; +def : WriteRes<WriteVST4, []>; + +def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1 +def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1 //===----------------------------------------------------------------------===// // Subtarget-specific SchedReadWrites. @@ -106,6 +143,9 @@ def : ReadAdvance<R52Read_F2, 2>; // Cortex-R52 specific SchedWrites for use with InstRW def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; } +def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> { + let Latency = 4; let NumMicroOps = 0; +} def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> { let Latency = 8; let ResourceCycles = [8]; // not pipelined } @@ -120,6 +160,19 @@ def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; } def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; } def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; } +// Alias generics to sub-target specific +def : SchedAlias<WriteMUL16, R52WriteMAC>; +def : SchedAlias<WriteMUL32, R52WriteMAC>; +def : SchedAlias<WriteMUL64Lo, R52WriteMAC>; +def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>; +def : SchedAlias<WriteMAC16, R52WriteMAC>; +def : SchedAlias<WriteMAC32, R52WriteMAC>; +def : SchedAlias<WriteMAC64Lo, R52WriteMAC>; +def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>; +def : SchedAlias<WritePreLd, R52WriteLd>; +def : SchedAlias<WriteLd, R52WriteLd>; +def : SchedAlias<WriteST, R52WriteST>; + def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; } def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> { let Latency = 4; @@ -147,19 +200,17 @@ def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL, def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; } def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; } -def R52WriteFPDIV_SP : SchedWriteRes<[R52UnitFPDIV]> { - let Latency = 7; // FP div takes fixed #cycles - let ResourceCycles = [7]; // is not pipelined - } -def R52WriteFPDIV_DP : SchedWriteRes<[R52UnitFPDIV]> { - let Latency = 17; - let ResourceCycles = [17]; -} - - //===----------------------------------------------------------------------===// -// Subtarget-specific - map operands to SchedReadWrites +// Floating-point. Map target defined SchedReadWrites to processor specific ones +// +def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>; +def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>; +def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>; +def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>; +//===----------------------------------------------------------------------===// +// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types. +// def : InstRW<[WriteALU], (instrs COPY)>; def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], @@ -235,7 +286,7 @@ def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS], "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>; def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS], - (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>; + (instregex "t2SDIV", "t2UDIV")>; // Loads (except POST) with SHL > 2, or ror, require 2 extra cycles. // However, that's non-trivial to specify, so we keep it uniform @@ -294,15 +345,6 @@ def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>; def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>; def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>; -//def : InstRW<[R52WriteLd, R52Read_ISS], (instregex "^LDRB?(_PRE_IMM|_POST_IMM)", "LDRrs")>; -//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_PRE_REG", "LDRB?rr")>; -//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_POST_REG")>; - -//def : InstRW<[R52WriteST, R52Read_ISS], (instregex "STRi12", "PICSTR")>; -//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_PRE_REG", "STRB?_PRE_REG")>; -//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_POST_REG", "STRB?_POST_REG")>; - - // Integer Load, Multiple. foreach Lat = 3-25 in { def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> { @@ -492,12 +534,6 @@ def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VAC def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>; def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>; -def : InstRW<[R52WriteFPDIV_SP, R52Read_F0, R52Read_F0], (instregex "VDIV(S|H)")>; -def : InstRW<[R52WriteFPDIV_DP, R52Read_F0, R52Read_F0], (instregex "VDIVD")>; - -def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], - (instregex "(VFMA|VFMS|VFNMA|VFNMS)(D|H|S)")>; - def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>; def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>; @@ -682,21 +718,24 @@ def R52WriteSTM : SchedWriteVariant<[ // Vector Load/Stores. Can issue only in slot-0. Can dual-issue with // another instruction in slot-1, but only in the last issue. -def R52WriteVLD1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5;} -def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> { +def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;} +def : WriteRes<WriteVLD2, [R52UnitLd]> { let Latency = 6; let NumMicroOps = 3; let ResourceCycles = [2]; + let SingleIssue = 1; } -def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> { +def : WriteRes<WriteVLD3, [R52UnitLd]> { let Latency = 7; let NumMicroOps = 5; let ResourceCycles = [3]; + let SingleIssue = 1; } -def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> { +def : WriteRes<WriteVLD4, [R52UnitLd]> { let Latency = 8; let NumMicroOps = 7; let ResourceCycles = [4]; + let SingleIssue = 1; } def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5; @@ -777,9 +816,8 @@ def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHS def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>; def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>; -def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VMOV", "VORR", "VORN", "VREV")>; +def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>; def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>; -def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VMUL", "VNMUL", "VMLA")>; def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>; def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>; def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>; @@ -797,95 +835,6 @@ def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VR def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>; //--- -// VLDx. Vector Loads -//--- -// 1-element structure load -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>; - -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>; - -// 2-element structure load -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>; - -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>; - -// 3-element structure load -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>; - -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>; - -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>; - -// 4-element structure load -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; - - -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>; - -//--- // VSTx. Vector Stores //--- // 1-element structure store diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td index ea2bf4b..b838688 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td @@ -133,6 +133,8 @@ let SchedModel = SwiftModel in { def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>; def : ReadAdvance<ReadALU, 0>; def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>; + def : SchedAlias<WriteLd, SwiftWriteP2ThreeCycle>; + def : SchedAlias<WriteST, SwiftWriteP2>; def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[ @@ -166,10 +168,10 @@ let SchedModel = SwiftModel in { def : InstRW<[SwiftWriteP01OneCycle2x_load], (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>; - def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>; + def SwiftWriteP0TwoCycleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>; def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[ - SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>, + SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCycleTwoUops ]>, SchedVar<NoSchedPred, [ SwiftWriteP0OneCycle ]> ]>; @@ -282,6 +284,18 @@ let SchedModel = SwiftModel in { let ResourceCycles = [2, 3]; } + // Aliasing sub-target specific WriteRes to generic ones + def : SchedAlias<WriteMUL16, SwiftWriteP0FourCycle>; + def : SchedAlias<WriteMUL32, SwiftWriteP0FourCycle>; + def : SchedAlias<WriteMUL64Lo, SwiftP0P0P01FiveCycle>; + def : SchedAlias<WriteMUL64Hi, SwiftWrite5Cycle>; + def : SchedAlias<WriteMAC16, SwiftPredP0P01FourFiveCycle>; + def : SchedAlias<WriteMAC32, SwiftPredP0P01FourFiveCycle>; + def : SchedAlias<WriteMAC64Lo, SwiftWrite5Cycle>; + def : SchedAlias<WriteMAC64Hi, Swift2P03P01FiveCycle>; + def : ReadAdvance<ReadMUL, 0>; + def : SchedAlias<ReadMAC, SwiftReadAdvanceFourCyclesPred>; + // 4.2.15 Integer Multiply Accumulate, Long // 4.2.16 Integer Multiply Accumulate, Dual // 4.2.17 Integer Multiply Accumulate Accumulate, Long @@ -300,7 +314,7 @@ let SchedModel = SwiftModel in { let ResourceCycles = [1, 14]; } // 4.2.18 Integer Divide - def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround. + def : WriteRes<WriteDIV, [SwiftUnitDiv]>; // Workaround. def : InstRW <[SwiftDiv], (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>; @@ -310,7 +324,7 @@ let SchedModel = SwiftModel in { let Latency = 3; let NumMicroOps = 2; } - def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> { + def SwiftWriteP2P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> { let Latency = 4; let NumMicroOps = 2; } @@ -343,7 +357,7 @@ let SchedModel = SwiftModel in { "tLDR(r|i|spi|pci|pciASM)")>; def : InstRW<[SwiftWriteP2ThreeCycle], (instregex "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>; - def : InstRW<[SwiftWriteP2P01FourCyle], + def : InstRW<[SwiftWriteP2P01FourCycle], (instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$", "t2LDRpci_pic", "tLDRS(B|H)")>; def : InstRW<[SwiftWriteP2P01ThreeCycle, SwiftWrBackOne], @@ -597,8 +611,6 @@ let SchedModel = SwiftModel in { def : InstRW<[SwiftWriteP1FourCycle], (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH", "VMULL", "VQDMULL")>; - def : InstRW<[SwiftWriteP1SixCycle], - (instregex "VMULD", "VNMULD")>; def : InstRW<[SwiftWriteP1FourCycle], (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)", "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>; @@ -607,8 +619,6 @@ let SchedModel = SwiftModel in { // 4.2.36 Advanced SIMD and VFP, Convert def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>; - // Fixpoint conversions. - def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; } // 4.2.37 Advanced SIMD and VFP, Move def : InstRW<[SwiftWriteP0TwoCycle], @@ -1036,6 +1046,40 @@ let SchedModel = SwiftModel in { def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>; def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>; + // ===---------------------------------------------------------------------===// + // Floating-point. Map target defined SchedReadWrite to processor specific ones + // + def : SchedAlias<WriteFPCVT, SwiftWriteP1FourCycle>; + def : SchedAlias<WriteFPMOV, SwiftWriteP2ThreeCycle>; + + def : SchedAlias<WriteFPALU32, SwiftWriteP0FourCycle>; + def : SchedAlias<WriteFPALU64, SwiftWriteP0SixCycle>; + + def : SchedAlias<WriteFPMUL32, SwiftWriteP1FourCycle>; + def : SchedAlias<WriteFPMUL64, SwiftWriteP1SixCycle>; + + def : SchedAlias<WriteFPMAC32, SwiftWriteP1FourCycle>; + def : SchedAlias<WriteFPMAC64, SwiftWriteP1FourCycle>; + + def : SchedAlias<WriteFPDIV32, SwiftDiv17>; + def : SchedAlias<WriteFPSQRT32, SwiftDiv17>; + + def : SchedAlias<WriteFPDIV64, SwiftDiv32>; + def : SchedAlias<WriteFPSQRT64, SwiftDiv32>; + + def : ReadAdvance<ReadFPMUL, 0>; + def : ReadAdvance<ReadFPMAC, 0>; + + // Overriden via InstRW for this processor. + def : WriteRes<WriteVLD1, []>; + def : WriteRes<WriteVLD2, []>; + def : WriteRes<WriteVLD3, []>; + def : WriteRes<WriteVLD4, []>; + def : WriteRes<WriteVST1, []>; + def : WriteRes<WriteVST2, []>; + def : WriteRes<WriteVST3, []>; + def : WriteRes<WriteVST4, []>; + // Not specified. def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>; // Preload. diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 3b99762..33dcf9b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -95,7 +95,7 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall( Entry.Node = Src; Entry.Ty = Type::getInt32Ty(*DAG.getContext()); - Entry.isSExt = false; + Entry.IsSExt = false; Args.push_back(Entry); } else { Entry.Node = Src; @@ -114,11 +114,11 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall( TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Chain) - .setCallee( - TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], - TLI->getPointerTy(DAG.getDataLayout())), - std::move(Args)) + .setLibCallee( + TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], + TLI->getPointerTy(DAG.getDataLayout())), + std::move(Args)) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); @@ -198,17 +198,18 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( return Chain; // Issue loads / stores for the trailing (1 - 3) bytes. + auto getRemainingValueType = [](unsigned BytesLeft) { + return (BytesLeft >= 2) ? MVT::i16 : MVT::i8; + }; + auto getRemainingSize = [](unsigned BytesLeft) { + return (BytesLeft >= 2) ? 2 : 1; + }; + unsigned BytesLeftSave = BytesLeft; i = 0; while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - + VT = getRemainingValueType(BytesLeft); + VTSize = getRemainingSize(BytesLeft); Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, dl, MVT::i32)), @@ -224,14 +225,8 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( i = 0; BytesLeft = BytesLeftSave; while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - + VT = getRemainingValueType(BytesLeft); + VTSize = getRemainingSize(BytesLeft); TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, dl, MVT::i32)), diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp index e2df0bd..2c42a13 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -11,27 +11,43 @@ // //===----------------------------------------------------------------------===// +#include "ARM.h" + +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "ARMCallLowering.h" +#include "ARMLegalizerInfo.h" +#include "ARMRegisterBankInfo.h" +#endif #include "ARMSubtarget.h" #include "ARMFrameLowering.h" -#include "ARMISelLowering.h" #include "ARMInstrInfo.h" -#include "ARMMachineFunctionInfo.h" -#include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Attributes.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" +#include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#endif +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/TargetParser.h" +#include "llvm/Target/TargetOptions.h" +#include <cassert> +#include <string> using namespace llvm; @@ -76,11 +92,6 @@ ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU, return *this; } -/// EnableExecuteOnly - Enables the generation of execute-only code on supported -/// targets -static cl::opt<bool> -EnableExecuteOnly("arm-execute-only"); - ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, StringRef FS) { ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS); @@ -90,13 +101,41 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, return new ARMFrameLowering(STI); } +#ifdef LLVM_BUILD_GLOBAL_ISEL +namespace { + +struct ARMGISelActualAccessor : public GISelAccessor { + std::unique_ptr<CallLowering> CallLoweringInfo; + std::unique_ptr<InstructionSelector> InstSelector; + std::unique_ptr<LegalizerInfo> Legalizer; + std::unique_ptr<RegisterBankInfo> RegBankInfo; + + const CallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + + const InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } +}; + +} // end anonymous namespace +#endif + ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle) : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps), - GenExecuteOnly(EnableExecuteOnly), CPUString(CPU), IsLittle(IsLittle), - TargetTriple(TT), Options(TM.Options), TM(TM), - FrameLowering(initializeFrameLowering(CPU, FS)), + CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), + TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)), // At this point initializeSubtargetDependencies has been called so // we can query directly. InstrInfo(isThumb1Only() @@ -104,7 +143,29 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, : !isThumb() ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this) : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)), - TLInfo(TM, *this), GISel() {} + TLInfo(TM, *this) { + assert((isThumb() || hasARMOps()) && + "Target must either be thumb or support ARM operations!"); + +#ifndef LLVM_BUILD_GLOBAL_ISEL + GISelAccessor *GISel = new GISelAccessor(); +#else + ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor(); + GISel->CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering())); + GISel->Legalizer.reset(new ARMLegalizerInfo(*this)); + + auto *RBI = new ARMRegisterBankInfo(*getRegisterInfo()); + + // FIXME: At this point, we can't rely on Subtarget having RBI. + // It's awkward to mix passing RBI and the Subtarget; should we pass + // TII/TRI as well? + GISel->InstSelector.reset(createARMInstructionSelector( + *static_cast<const ARMBaseTargetMachine *>(&TM), *this, *RBI)); + + GISel->RegBankInfo.reset(RBI); +#endif + setGISelAccessor(*GISel); +} const CallLowering *ARMSubtarget::getCallLowering() const { assert(GISel && "Access to GlobalISel APIs not set"); @@ -148,11 +209,11 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isTargetDarwin()) { StringRef ArchName = TargetTriple.getArchName(); - unsigned ArchKind = llvm::ARM::parseArch(ArchName); - if (ArchKind == llvm::ARM::AK_ARMV7S) + unsigned ArchKind = ARM::parseArch(ArchName); + if (ArchKind == ARM::AK_ARMV7S) // Default to the Swift CPU when targeting armv7s/thumbv7s. CPUString = "swift"; - else if (ArchKind == llvm::ARM::AK_ARMV7K) + else if (ArchKind == ARM::AK_ARMV7K) // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k. // ARMv7k does not use SjLj exception handling. CPUString = "cortex-a7"; @@ -200,12 +261,12 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // support in the assembler and linker to be used. This would need to be // fixed to fully support tail calls in Thumb1. // - // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take - // LR. This means if we need to reload LR, it takes an extra instructions, - // which outweighs the value of the tail call; but here we don't know yet - // whether LR is going to be used. Probably the right approach is to - // generate the tail call here and turn it back into CALL/RET in - // emitEpilogue if LR is used. + // For ARMv8-M, we /do/ implement tail calls. Doing this is tricky for v8-M + // baseline, since the LDM/POP instruction on Thumb doesn't take LR. This + // means if we need to reload LR, it takes extra instructions, which outweighs + // the value of the tail call; but here we don't know yet whether LR is going + // to be used. We generate the tail call here and turn it back into CALL/RET + // in emitEpilogue if LR is used. // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, // but we need to make sure there are enough registers; the only valid @@ -274,6 +335,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { case CortexM3: case ExynosM1: case CortexR52: + case Kryo: break; case Krait: PreISelOperandLatencyAdjustment = 1; diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index 8c8218d..e15b175 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -14,47 +14,93 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H #define LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H - +#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" #include "ARMFrameLowering.h" #include "ARMISelLowering.h" -#include "ARMInstrInfo.h" #include "ARMSelectionDAGInfo.h" -#include "ARMSubtarget.h" -#include "MCTargetDesc/ARMMCTargetDesc.h" -#include "Thumb1FrameLowering.h" -#include "Thumb1InstrInfo.h" -#include "Thumb2InstrInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/GlobalISel/GISelAccessor.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/MC/MCInstrItineraries.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <memory> #include <string> #define GET_SUBTARGETINFO_HEADER #include "ARMGenSubtargetInfo.inc" namespace llvm { + +class ARMBaseTargetMachine; class GlobalValue; class StringRef; -class TargetOptions; -class ARMBaseTargetMachine; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { - Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, - CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexR52, CortexM3, - CortexA32, CortexA35, CortexA53, CortexA57, CortexA72, CortexA73, - Krait, Swift, ExynosM1 + Others, + + CortexA12, + CortexA15, + CortexA17, + CortexA32, + CortexA35, + CortexA5, + CortexA53, + CortexA57, + CortexA7, + CortexA72, + CortexA73, + CortexA8, + CortexA9, + CortexM3, + CortexR4, + CortexR4F, + CortexR5, + CortexR52, + CortexR7, + ExynosM1, + Krait, + Kryo, + Swift }; enum ARMProcClassEnum { - None, AClass, RClass, MClass + None, + + AClass, + MClass, + RClass }; enum ARMArchEnum { - ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te, - ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r, - ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a, ARMv8mMainline, ARMv8mBaseline, + ARMv2, + ARMv2a, + ARMv3, + ARMv3m, + ARMv4, + ARMv4t, + ARMv5, + ARMv5t, + ARMv5te, + ARMv5tej, + ARMv6, + ARMv6k, + ARMv6kz, + ARMv6m, + ARMv6sm, + ARMv6t2, + ARMv7a, + ARMv7em, + ARMv7m, + ARMv7r, + ARMv7ve, + ARMv81a, + ARMv82a, + ARMv8a, + ARMv8mBaseline, + ARMv8mMainline, ARMv8r }; @@ -162,16 +208,12 @@ protected: /// FP registers for VFPv3. bool HasD16 = false; - /// HasHardwareDivide - True if subtarget supports [su]div - bool HasHardwareDivide = false; + /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode + bool HasHardwareDivideInThumb = false; /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode bool HasHardwareDivideInARM = false; - /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack - /// instructions. - bool HasT2ExtractPack = false; - /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier /// instructions. bool HasDataBarrier = false; @@ -192,6 +234,10 @@ protected: /// CPSR setting instruction. bool AvoidCPSRPartialUpdate = false; + /// CheapPredicableCPSRDef - If true, disable +1 predication cost + /// for instructions updating CPSR. Enabled for Cortex-A57. + bool CheapPredicableCPSRDef = false; + /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting /// movs with shifter operand (i.e. asr, lsl, lsr). bool AvoidMOVsShifterOperand = false; @@ -200,6 +246,11 @@ protected: /// avoid issue "normal" call instructions to callees which do not return. bool HasRetAddrStack = false; + /// HasBranchPredictor - True if the subtarget has a branch predictor. Having + /// a branch predictor or not changes the expected cost of taking a branch + /// which affects the choice of whether to use predicated instructions. + bool HasBranchPredictor = true; + /// HasMPExtension - True if the subtarget supports Multiprocessing /// extension (ARMv7 only). bool HasMPExtension = false; @@ -239,6 +290,10 @@ protected: /// HasFPAO - if true, processor does positive address offset computation faster bool HasFPAO = false; + /// HasFuseAES - if true, processor executes back to back AES instruction + /// pairs faster. + bool HasFuseAES = false; + /// If true, if conversion may decide to leave some instructions unpredicated. bool IsProfitableToUnpredicate = false; @@ -310,6 +365,10 @@ protected: /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS). bool UseSjLjEH = false; + /// Implicitly convert an instruction to a different one if its immediates + /// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1. + bool NegativeImmediates = true; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment = 4; @@ -362,6 +421,7 @@ public: unsigned getMaxInlineSizeThreshold() const { return 64; } + /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); @@ -373,15 +433,19 @@ public: const ARMSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } + const ARMBaseInstrInfo *getInstrInfo() const override { return InstrInfo.get(); } + const ARMTargetLowering *getTargetLowering() const override { return &TLInfo; } + const ARMFrameLowering *getFrameLowering() const override { return FrameLowering.get(); } + const ARMBaseRegisterInfo *getRegisterInfo() const override { return &InstrInfo->getRegisterInfo(); } @@ -451,19 +515,21 @@ public: bool hasCRC() const { return HasCRC; } bool hasRAS() const { return HasRAS; } bool hasVirtualization() const { return HasVirtualization; } + bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } - bool hasDivide() const { return HasHardwareDivide; } + bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; } bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } - bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool hasDataBarrier() const { return HasDataBarrier; } bool hasV7Clrex() const { return HasV7Clrex; } bool hasAcquireRelease() const { return HasAcquireRelease; } + bool hasAnyDataBarrier() const { return HasDataBarrier || (hasV6Ops() && !isThumb()); } + bool useMulOps() const { return UseMulOps; } bool useFPVMLx() const { return !SlowFPVMLx; } bool hasVMLxForwarding() const { return HasVMLxForwarding; } @@ -490,8 +556,10 @@ public: bool nonpipelinedVFP() const { return NonpipelinedVFP; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } + bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; } bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } bool hasRetAddrStack() const { return HasRetAddrStack; } + bool hasBranchPredictor() const { return HasBranchPredictor; } bool hasMPExtension() const { return HasMPExtension; } bool hasDSP() const { return HasDSP; } bool useNaClTrap() const { return UseNaClTrap; } @@ -503,6 +571,10 @@ public: bool hasD16() const { return HasD16; } bool hasFullFP16() const { return HasFullFP16; } + bool hasFuseAES() const { return HasFuseAES; } + /// \brief Return true if the CPU supports any kind of instruction fusion. + bool hasFusion() const { return hasFuseAES(); } + const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } @@ -561,9 +633,10 @@ public: TargetTriple.getEnvironment() == Triple::EABIHF || isTargetWindows() || isAAPCS16_ABI(); } + bool isTargetAndroid() const { return TargetTriple.isAndroid(); } - virtual bool isXRaySupported() const override; + bool isXRaySupported() const override; bool isAPCS_ABI() const; bool isAAPCS_ABI() const; @@ -588,6 +661,7 @@ public: bool useR7AsFramePointer() const { return isTargetDarwin() || (!isTargetWindows() && isThumb()); } + /// Returns true if the frame setup is split into two separate pushes (first /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent /// to lr. This is always required on Thumb1-only targets, as the push and @@ -656,6 +730,7 @@ public: /// True if fast-isel is used. bool useFastISel() const; }; -} // End llvm namespace -#endif // ARMSUBTARGET_H +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 70c9567..c323a1d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -10,30 +10,47 @@ // //===----------------------------------------------------------------------===// -#include "ARMTargetMachine.h" #include "ARM.h" -#include "ARMCallLowering.h" -#include "ARMFrameLowering.h" -#include "ARMInstructionSelector.h" -#include "ARMLegalizerInfo.h" -#include "ARMRegisterBankInfo.h" +#include "ARMSubtarget.h" +#include "ARMMacroFusion.h" +#include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" #include "ARMTargetTransformInfo.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/ExecutionDepsFix.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" +#include <cassert> +#include <memory> +#include <string> + using namespace llvm; static cl::opt<bool> @@ -57,91 +74,57 @@ static cl::opt<cl::boolOrDefault> EnableGlobalMerge("arm-global-merge", cl::Hidden, cl::desc("Enable the global merge pass")); +namespace llvm { + void initializeARMExecutionDepsFixPass(PassRegistry&); +} + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget()); + RegisterTargetMachine<ARMLETargetMachine> A(getTheThumbLETarget()); RegisterTargetMachine<ARMBETargetMachine> Y(getTheARMBETarget()); - RegisterTargetMachine<ThumbLETargetMachine> A(getTheThumbLETarget()); - RegisterTargetMachine<ThumbBETargetMachine> B(getTheThumbBETarget()); + RegisterTargetMachine<ARMBETargetMachine> B(getTheThumbBETarget()); PassRegistry &Registry = *PassRegistry::getPassRegistry(); initializeGlobalISel(Registry); initializeARMLoadStoreOptPass(Registry); initializeARMPreAllocLoadStoreOptPass(Registry); + initializeARMConstantIslandsPass(Registry); + initializeARMExecutionDepsFixPass(Registry); } static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { if (TT.isOSBinFormatMachO()) - return make_unique<TargetLoweringObjectFileMachO>(); + return llvm::make_unique<TargetLoweringObjectFileMachO>(); if (TT.isOSWindows()) - return make_unique<TargetLoweringObjectFileCOFF>(); - return make_unique<ARMElfTargetObjectFile>(); + return llvm::make_unique<TargetLoweringObjectFileCOFF>(); + return llvm::make_unique<ARMElfTargetObjectFile>(); } static ARMBaseTargetMachine::ARMABI computeTargetABI(const Triple &TT, StringRef CPU, const TargetOptions &Options) { - if (Options.MCOptions.getABIName() == "aapcs16") + StringRef ABIName = Options.MCOptions.getABIName(); + + if (ABIName.empty()) + ABIName = ARM::computeDefaultTargetABI(TT, CPU); + + if (ABIName == "aapcs16") return ARMBaseTargetMachine::ARM_ABI_AAPCS16; - else if (Options.MCOptions.getABIName().startswith("aapcs")) + else if (ABIName.startswith("aapcs")) return ARMBaseTargetMachine::ARM_ABI_AAPCS; - else if (Options.MCOptions.getABIName().startswith("apcs")) + else if (ABIName.startswith("apcs")) return ARMBaseTargetMachine::ARM_ABI_APCS; - assert(Options.MCOptions.getABIName().empty() && - "Unknown target-abi option!"); - - ARMBaseTargetMachine::ARMABI TargetABI = - ARMBaseTargetMachine::ARM_ABI_UNKNOWN; - - unsigned ArchKind = llvm::ARM::parseCPUArch(CPU); - StringRef ArchName = llvm::ARM::getArchName(ArchKind); - // FIXME: This is duplicated code from the front end and should be unified. - if (TT.isOSBinFormatMachO()) { - if (TT.getEnvironment() == llvm::Triple::EABI || - (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) || - llvm::ARM::parseArchProfile(ArchName) == llvm::ARM::PK_M) { - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; - } else if (TT.isWatchABI()) { - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16; - } else { - TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; - } - } else if (TT.isOSWindows()) { - // FIXME: this is invalid for WindowsCE - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; - } else { - // Select the default based on the platform. - switch (TT.getEnvironment()) { - case llvm::Triple::Android: - case llvm::Triple::GNUEABI: - case llvm::Triple::GNUEABIHF: - case llvm::Triple::MuslEABI: - case llvm::Triple::MuslEABIHF: - case llvm::Triple::EABIHF: - case llvm::Triple::EABI: - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; - break; - case llvm::Triple::GNU: - TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; - break; - default: - if (TT.isOSNetBSD()) - TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; - else - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; - break; - } - } - - return TargetABI; + llvm_unreachable("Unhandled/unknown ABI Name!"); + return ARMBaseTargetMachine::ARM_ABI_UNKNOWN; } static std::string computeDataLayout(const Triple &TT, StringRef CPU, const TargetOptions &Options, bool isLittle) { auto ABI = computeTargetABI(TT, CPU, Options); - std::string Ret = ""; + std::string Ret; if (isLittle) // Little endian. @@ -219,49 +202,38 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), CM, OL), TargetABI(computeTargetABI(TT, CPU, Options)), - TLOF(createTLOF(getTargetTriple())), - Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) { + TLOF(createTLOF(getTargetTriple())), isLittle(isLittle) { // Default to triple-appropriate float ABI - if (Options.FloatABIType == FloatABI::Default) - this->Options.FloatABIType = - Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft; + if (Options.FloatABIType == FloatABI::Default) { + if (TargetTriple.getEnvironment() == Triple::GNUEABIHF || + TargetTriple.getEnvironment() == Triple::MuslEABIHF || + TargetTriple.getEnvironment() == Triple::EABIHF || + TargetTriple.isOSWindows() || + TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16) + this->Options.FloatABIType = FloatABI::Hard; + else + this->Options.FloatABIType = FloatABI::Soft; + } // Default to triple-appropriate EABI if (Options.EABIVersion == EABI::Default || Options.EABIVersion == EABI::Unknown) { // musl is compatible with glibc with regard to EABI version - if (Subtarget.isTargetGNUAEABI() || Subtarget.isTargetMuslAEABI()) + if ((TargetTriple.getEnvironment() == Triple::GNUEABI || + TargetTriple.getEnvironment() == Triple::GNUEABIHF || + TargetTriple.getEnvironment() == Triple::MuslEABI || + TargetTriple.getEnvironment() == Triple::MuslEABIHF) && + !(TargetTriple.isOSWindows() || TargetTriple.isOSDarwin())) this->Options.EABIVersion = EABI::GNU; else this->Options.EABIVersion = EABI::EABI5; } -} -ARMBaseTargetMachine::~ARMBaseTargetMachine() {} + initAsmInfo(); +} -#ifdef LLVM_BUILD_GLOBAL_ISEL -namespace { -struct ARMGISelActualAccessor : public GISelAccessor { - std::unique_ptr<CallLowering> CallLoweringInfo; - std::unique_ptr<InstructionSelector> InstSelector; - std::unique_ptr<LegalizerInfo> Legalizer; - std::unique_ptr<RegisterBankInfo> RegBankInfo; - const CallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } -}; -} // End anonymous namespace. -#endif +ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; const ARMSubtarget * ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { @@ -294,24 +266,6 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle); - -#ifndef LLVM_BUILD_GLOBAL_ISEL - GISelAccessor *GISel = new GISelAccessor(); -#else - ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor(); - GISel->CallLoweringInfo.reset(new ARMCallLowering(*I->getTargetLowering())); - GISel->Legalizer.reset(new ARMLegalizerInfo()); - - auto *RBI = new ARMRegisterBankInfo(*I->getRegisterInfo()); - - // FIXME: At this point, we can't rely on Subtarget having RBI. - // It's awkward to mix passing RBI and the Subtarget; should we pass - // TII/TRI as well? - GISel->InstSelector.reset(new ARMInstructionSelector(*I, *RBI)); - - GISel->RegBankInfo.reset(RBI); -#endif - I->setGISelAccessor(*GISel); } return I.get(); } @@ -322,22 +276,6 @@ TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() { }); } -void ARMTargetMachine::anchor() {} - -ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional<Reloc::Model> RM, - CodeModel::Model CM, CodeGenOpt::Level OL, - bool isLittle) - : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) { - initAsmInfo(); - if (!Subtarget.hasARMOps()) - report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " - "support ARM mode execution!"); -} - -void ARMLETargetMachine::anchor() {} ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -345,9 +283,7 @@ ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, Optional<Reloc::Model> RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} - -void ARMBETargetMachine::anchor() {} + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -355,51 +291,40 @@ ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, Optional<Reloc::Model> RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} - -void ThumbTargetMachine::anchor() {} - -ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional<Reloc::Model> RM, - CodeModel::Model CM, - CodeGenOpt::Level OL, bool isLittle) - : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) { - initAsmInfo(); -} - -void ThumbLETargetMachine::anchor() {} - -ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional<Reloc::Model> RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) - : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} - -void ThumbBETargetMachine::anchor() {} - -ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional<Reloc::Model> RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) - : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} namespace { + /// ARM Code Generator Pass Configuration Options. class ARMPassConfig : public TargetPassConfig { public: - ARMPassConfig(ARMBaseTargetMachine *TM, PassManagerBase &PM) + ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} ARMBaseTargetMachine &getARMTargetMachine() const { return getTM<ARMBaseTargetMachine>(); } + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + ScheduleDAGMILive *DAG = createGenericSchedLive(C); + // add DAG Mutations here. + const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>(); + if (ST.hasFusion()) + DAG->addMutation(createARMMacroFusionDAGMutation()); + return DAG; + } + + ScheduleDAGInstrs * + createPostMachineScheduler(MachineSchedContext *C) const override { + ScheduleDAGMI *DAG = createGenericSchedPostRA(C); + // add DAG Mutations here. + const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>(); + if (ST.hasFusion()) + DAG->addMutation(createARMMacroFusionDAGMutation()); + return DAG; + } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; @@ -413,17 +338,31 @@ public: void addPreSched2() override; void addPreEmitPass() override; }; -} // namespace + +class ARMExecutionDepsFix : public ExecutionDepsFix { +public: + static char ID; + ARMExecutionDepsFix() : ExecutionDepsFix(ID, ARM::DPRRegClass) {} + StringRef getPassName() const override { + return "ARM Execution Dependency Fix"; + } +}; +char ARMExecutionDepsFix::ID; + +} // end anonymous namespace + +INITIALIZE_PASS(ARMExecutionDepsFix, "arm-execution-deps-fix", + "ARM Execution Dependency Fix", false, false) TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { - return new ARMPassConfig(this, PM); + return new ARMPassConfig(*this, PM); } void ARMPassConfig::addIRPasses() { if (TM->Options.ThreadModel == ThreadModel::Single) addPass(createLowerAtomicPass()); else - addPass(createAtomicExpandPass(TM)); + addPass(createAtomicExpandPass()); // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in @@ -438,7 +377,7 @@ void ARMPassConfig::addIRPasses() { // Match interleaved memory accesses to ldN/stN intrinsics. if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createInterleavedAccessPass(TM)); + addPass(createInterleavedAccessPass()); } bool ARMPassConfig::addPreISel() { @@ -508,7 +447,7 @@ void ARMPassConfig::addPreSched2() { if (EnableARMLoadStoreOpt) addPass(createARMLoadStoreOptimizationPass()); - addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); + addPass(new ARMExecutionDepsFix()); } // Expand some pseudo instructions into multiple instructions to allow diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h index c6b70b9..22ce949 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -14,10 +14,14 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H #define LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H -#include "ARMInstrInfo.h" #include "ARMSubtarget.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" +#include <memory> namespace llvm { @@ -32,7 +36,6 @@ public: protected: std::unique_ptr<TargetLoweringObjectFile> TLOF; - ARMSubtarget Subtarget; bool isLittle; mutable StringMap<std::unique_ptr<ARMSubtarget>> SubtargetMap; @@ -43,8 +46,10 @@ public: CodeGenOpt::Level OL, bool isLittle); ~ARMBaseTargetMachine() override; - const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } const ARMSubtarget *getSubtargetImpl(const Function &F) const override; + // The no argument getSubtargetImpl, while it exists on some targets, is + // deprecated and should not be used. + const ARMSubtarget *getSubtargetImpl() const = delete; bool isLittleEndian() const { return isLittle; } /// \brief Get the TargetIRAnalysis for this target. @@ -56,23 +61,15 @@ public: TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } -}; -/// ARM target machine. -/// -class ARMTargetMachine : public ARMBaseTargetMachine { - virtual void anchor(); - public: - ARMTargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional<Reloc::Model> RM, CodeModel::Model CM, - CodeGenOpt::Level OL, bool isLittle); + bool isMachineVerifierClean() const override { + return false; + } }; -/// ARM little endian target machine. +/// ARM/Thumb little endian target machine. /// -class ARMLETargetMachine : public ARMTargetMachine { - void anchor() override; +class ARMLETargetMachine : public ARMBaseTargetMachine { public: ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -80,10 +77,9 @@ public: CodeGenOpt::Level OL); }; -/// ARM big endian target machine. +/// ARM/Thumb big endian target machine. /// -class ARMBETargetMachine : public ARMTargetMachine { - void anchor() override; +class ARMBETargetMachine : public ARMBaseTargetMachine { public: ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -91,41 +87,6 @@ public: CodeGenOpt::Level OL); }; -/// Thumb target machine. -/// Due to the way architectures are handled, this represents both -/// Thumb-1 and Thumb-2. -/// -class ThumbTargetMachine : public ARMBaseTargetMachine { - virtual void anchor(); -public: - ThumbTargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional<Reloc::Model> RM, CodeModel::Model CM, - CodeGenOpt::Level OL, bool isLittle); -}; - -/// Thumb little endian target machine. -/// -class ThumbLETargetMachine : public ThumbTargetMachine { - void anchor() override; -public: - ThumbLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional<Reloc::Model> RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - -/// Thumb big endian target machine. -/// -class ThumbBETargetMachine : public ThumbTargetMachine { - void anchor() override; -public: - ThumbBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional<Reloc::Model> RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp index 625c428..88bab64 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -8,16 +8,19 @@ //===----------------------------------------------------------------------===// #include "ARMTargetObjectFile.h" +#include "ARMSubtarget.h" #include "ARMTargetMachine.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/IR/Mangler.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ELF.h" -#include "llvm/Target/TargetLowering.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/SectionKind.h" +#include "llvm/Target/TargetMachine.h" +#include <cassert> + using namespace llvm; using namespace dwarf; @@ -27,9 +30,9 @@ using namespace dwarf; void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { - const ARMTargetMachine &ARM_TM = static_cast<const ARMTargetMachine &>(TM); - bool isAAPCS_ABI = ARM_TM.TargetABI == ARMTargetMachine::ARMABI::ARM_ABI_AAPCS; - genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly(); + const ARMBaseTargetMachine &ARM_TM = static_cast<const ARMBaseTargetMachine &>(TM); + bool isAAPCS_ABI = ARM_TM.TargetABI == ARMBaseTargetMachine::ARMABI::ARM_ABI_AAPCS; + // genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly(); TargetLoweringObjectFileELF::Initialize(Ctx, TM); InitializeELF(isAAPCS_ABI); @@ -40,16 +43,6 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, AttributesSection = getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0); - - // Make code section unreadable when in execute-only mode - if (genExecuteOnly) { - unsigned Type = ELF::SHT_PROGBITS; - unsigned Flags = ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_ARM_PURECODE; - // Since we cannot modify flags for an existing section, we create a new - // section with the right flags, and use 0 as the unique ID for - // execute-only text - TextSection = Ctx.getELFSection(".text", Type, Flags, 0, "", 0U); - } } const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference( @@ -71,21 +64,27 @@ getDebugThreadLocalSymbol(const MCSymbol *Sym) const { getContext()); } -MCSection * -ARMElfTargetObjectFile::getExplicitSectionGlobal(const GlobalObject *GO, - SectionKind SK, const TargetMachine &TM) const { +static bool isExecuteOnlyFunction(const GlobalObject *GO, SectionKind SK, + const TargetMachine &TM) { + if (const Function *F = dyn_cast<Function>(GO)) + if (TM.getSubtarget<ARMSubtarget>(*F).genExecuteOnly() && SK.isText()) + return true; + return false; +} + +MCSection *ARMElfTargetObjectFile::getExplicitSectionGlobal( + const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const { // Set execute-only access for the explicit section - if (genExecuteOnly && SK.isText()) + if (isExecuteOnlyFunction(GO, SK, TM)) SK = SectionKind::getExecuteOnly(); return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, SK, TM); } -MCSection * -ARMElfTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO, - SectionKind SK, const TargetMachine &TM) const { +MCSection *ARMElfTargetObjectFile::SelectSectionForGlobal( + const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const { // Place the global in the execute-only text section - if (genExecuteOnly && SK.isText()) + if (isExecuteOnlyFunction(GO, SK, TM)) SK = SectionKind::getExecuteOnly(); return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, SK, TM); diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h index 24e755d..bd7aa1c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h @@ -11,19 +11,17 @@ #define LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCExpr.h" namespace llvm { -class MCContext; -class TargetMachine; - class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { - mutable bool genExecuteOnly = false; protected: - const MCSection *AttributesSection; + const MCSection *AttributesSection = nullptr; + public: ARMElfTargetObjectFile() - : TargetLoweringObjectFileELF(), AttributesSection(nullptr) { + : TargetLoweringObjectFileELF() { PLTRelativeVariantKind = MCSymbolRefExpr::VK_ARM_PREL31; } @@ -47,4 +45,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 2b6b36b..51b0fed 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -15,6 +15,24 @@ using namespace llvm; #define DEBUG_TYPE "armtti" +bool ARMTTIImpl::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + // To inline a callee, all features not in the whitelist must match exactly. + bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) == + (CalleeBits & ~InlineFeatureWhitelist); + // For features in the whitelist, the callee's features must be a subset of + // the callers'. + bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) == + (CalleeBits & InlineFeatureWhitelist); + return MatchExact && MatchSubset; +} + int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); @@ -92,7 +110,8 @@ int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, } -int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { +int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -310,7 +329,8 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, return BaseT::getVectorInstrCost(Opcode, ValTy, Index); } -int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { +int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a a vector select gets lowered to vbsl. @@ -335,7 +355,7 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { return LT.first; } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, @@ -504,7 +524,7 @@ int ARMTTIImpl::getArithmeticInstrCost( } int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) { + unsigned AddressSpace, const Instruction *I) { std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); if (Src->isVectorTy() && Alignment != 16 && @@ -529,12 +549,14 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) { unsigned NumElts = VecTy->getVectorNumElements(); - Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); + auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); // vldN/vstN only support legal vector types of size 64 or 128 in bits. - if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) - return Factor; + // Accesses having vector types that are a multiple of 128 bits can be + // matched to more than one vldN/vstN instruction. + if (NumElts % Factor == 0 && + TLI->isLegalInterleavedAccessType(SubVecTy, DL)) + return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL); } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 3c83cd9..0695a4e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -33,9 +33,38 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { const ARMSubtarget *ST; const ARMTargetLowering *TLI; - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); + // Currently the following features are excluded from InlineFeatureWhitelist. + // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16 + // Depending on whether they are set or unset, different + // instructions/registers are available. For example, inlining a callee with + // -thumb-mode in a caller with +thumb-mode, may cause the assembler to + // fail if the callee uses ARM only instructions, e.g. in inline asm. + const FeatureBitset InlineFeatureWhitelist = { + ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2, + ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8, + ARM::FeatureFullFP16, ARM::FeatureHWDivThumb, + ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex, + ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc, + ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt, + ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS, + ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing, + ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32, + ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR, + ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits, + ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg, + ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx, + ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs, + ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign, + ARM::FeatureHasSlowFPVMLx, ARM::FeatureVMLxForwarding, + ARM::FeaturePref32BitThumb, ARM::FeatureAvoidPartialCPSR, + ARM::FeatureCheapPredicableCPSR, ARM::FeatureAvoidMOVsShOp, + ARM::FeatureHasRetAddrStack, ARM::FeatureHasNoBranchPredictor, + ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization, + ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass, + ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls, + ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt, + ARM::FeatureNoNegativeImmediates + }; const ARMSubtarget *getST() const { return ST; } const ARMTargetLowering *getTLI() const { return TLI; } @@ -45,6 +74,9 @@ public: : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; + bool enableInterleavedAccessVectorization() { return true; } /// Floating-point computation using ARMv8 AArch32 Advanced @@ -82,7 +114,7 @@ public: return 13; } - unsigned getRegisterBitWidth(bool Vector) { + unsigned getRegisterBitWidth(bool Vector) const { if (Vector) { if (ST->hasNEON()) return 128; @@ -98,9 +130,11 @@ public: int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); - int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I = nullptr); - int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); @@ -118,7 +152,7 @@ public: ArrayRef<const Value *> Args = ArrayRef<const Value *>()); int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); + unsigned AddressSpace, const Instruction *I = nullptr); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index c243a2d..1129826 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -17,6 +17,8 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -39,10 +41,8 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetParser.h" @@ -67,6 +67,9 @@ static cl::opt<ImplicitItModeTy> ImplicitItMode( clEnumValN(ImplicitItModeTy::ThumbOnly, "thumb", "Warn in ARM, emit implicit ITs in Thumb"))); +static cl::opt<bool> AddBuildAttributes("arm-add-build-attributes", + cl::init(false)); + class ARMOperand; enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; @@ -540,6 +543,10 @@ public: // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + // Add build attributes based on the selected target. + if (AddBuildAttributes) + getTargetStreamer().emitTargetAttributes(STI); + // Not in an ITBlock to start with. ITState.CurPosition = ~0U; @@ -915,40 +922,37 @@ public: int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue())); return Val != -1; } - bool isFBits16() const { + + template<int64_t N, int64_t M> + bool isImmediate() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - return Value >= 0 && Value <= 16; + return Value >= N && Value <= M; } - bool isFBits32() const { + template<int64_t N, int64_t M> + bool isImmediateS4() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - return Value >= 1 && Value <= 32; + return ((Value & 3) == 0) && Value >= N && Value <= M; + } + bool isFBits16() const { + return isImmediate<0, 17>(); + } + bool isFBits32() const { + return isImmediate<1, 33>(); } bool isImm8s4() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020; + return isImmediateS4<-1020, 1020>(); } bool isImm0_1020s4() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return ((Value & 3) == 0) && Value >= 0 && Value <= 1020; + return isImmediateS4<0, 1020>(); } bool isImm0_508s4() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return ((Value & 3) == 0) && Value >= 0 && Value <= 508; + return isImmediateS4<0, 508>(); } bool isImm0_508s4Neg() const { if (!isImm()) return false; @@ -958,27 +962,6 @@ public: // explicitly exclude zero. we want that to use the normal 0_508 version. return ((Value & 3) == 0) && Value > 0 && Value <= 508; } - bool isImm0_239() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 240; - } - bool isImm0_255() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 256; - } - bool isImm0_4095() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 4096; - } bool isImm0_4095Neg() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -986,145 +969,17 @@ public: int64_t Value = -CE->getValue(); return Value > 0 && Value < 4096; } - bool isImm0_1() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 2; - } - bool isImm0_3() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 4; - } bool isImm0_7() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 8; - } - bool isImm0_15() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 16; - } - bool isImm0_31() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 32; - } - bool isImm0_63() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 64; - } - bool isImm8() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value == 8; - } - bool isImm16() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value == 16; - } - bool isImm32() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value == 32; - } - bool isShrImm8() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value <= 8; - } - bool isShrImm16() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value <= 16; - } - bool isShrImm32() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value <= 32; - } - bool isShrImm64() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value <= 64; - } - bool isImm1_7() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value < 8; - } - bool isImm1_15() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value < 16; - } - bool isImm1_31() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value < 32; + return isImmediate<0, 7>(); } bool isImm1_16() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value < 17; + return isImmediate<1, 16>(); } bool isImm1_32() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value < 33; + return isImmediate<1, 32>(); } - bool isImm0_32() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 33; - } - bool isImm0_65535() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 65536; + bool isImm8_255() const { + return isImmediate<8, 255>(); } bool isImm256_65535Expr() const { if (!isImm()) return false; @@ -1145,32 +1000,16 @@ public: return Value >= 0 && Value < 65536; } bool isImm24bit() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value <= 0xffffff; + return isImmediate<0, 0xffffff + 1>(); } bool isImmThumbSR() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value < 33; + return isImmediate<1, 33>(); } bool isPKHLSLImm() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 32; + return isImmediate<0, 32>(); } bool isPKHASRImm() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value <= 32; + return isImmediate<0, 33>(); } bool isAdrLabel() const { // If we have an immediate that's not a constant, treat it as a label @@ -1187,6 +1026,15 @@ public: ARM_AM::getSOImmVal(-Value) != -1); } bool isT2SOImm() const { + // If we have an immediate that's not a constant, treat it as an expression + // needing a fixup. + if (isImm() && !isa<MCConstantExpr>(getImm())) { + // We want to avoid matching :upper16: and :lower16: as we want these + // expressions to match in isImm0_65535Expr() + const ARMMCExpr *ARM16Expr = dyn_cast<ARMMCExpr>(getImm()); + return (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 && + ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16)); + } if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; @@ -1245,6 +1093,20 @@ public: return ARM_AM::getSOImmVal(Value) == -1 && ARM_AM::getSOImmVal(-Value) != -1; } + bool isThumbModImmNeg1_7() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int32_t Value = -(int32_t)CE->getValue(); + return 0 < Value && Value < 8; + } + bool isThumbModImmNeg8_255() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int32_t Value = -(int32_t)CE->getValue(); + return 7 < Value && Value < 256; + } bool isConstantPoolImm() const { return Kind == k_ConstantPoolImmediate; } bool isBitfield() const { return Kind == k_BitfieldDescriptor; } bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; } @@ -2035,6 +1897,20 @@ public: Inst.addOperand(MCOperand::createImm(Enc)); } + void addThumbModImmNeg8_255Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + uint32_t Val = -CE->getValue(); + Inst.addOperand(MCOperand::createImm(Val)); + } + + void addThumbModImmNeg1_7Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + uint32_t Val = -CE->getValue(); + Inst.addOperand(MCOperand::createImm(Val)); + } + void addBitfieldOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // Munge the lsb/width into a bitfield mask. @@ -2141,7 +2017,7 @@ public: // The operand is actually a t2_so_imm, but we have its bitwise // negation in the assembly source, so twiddle it here. const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::createImm(~CE->getValue())); + Inst.addOperand(MCOperand::createImm(~(uint32_t)CE->getValue())); } void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const { @@ -2149,7 +2025,7 @@ public: // The operand is actually a t2_so_imm, but we have its // negation in the assembly source, so twiddle it here. const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::createImm(-CE->getValue())); + Inst.addOperand(MCOperand::createImm(-(uint32_t)CE->getValue())); } void addImm0_4095NegOperands(MCInst &Inst, unsigned N) const { @@ -4330,7 +4206,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { // If some specific flag is already set, it means that some letter is // present more than once, this is not acceptable. - if (FlagsVal == ~0U || (FlagsVal & Flag)) + if (Flag == ~0U || (FlagsVal & Flag)) return MatchOperand_NoMatch; FlagsVal |= Flag; } @@ -5373,6 +5249,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { // Fall though for the Identifier case that is not a register or a // special name. + LLVM_FALLTHROUGH; } case AsmToken::LParen: // parenthesized expressions like (_strcmp-4) case AsmToken::Integer: // things like 1f and 2b as a branch targets @@ -5484,7 +5361,8 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { enum { COFF = (1 << MCObjectFileInfo::IsCOFF), ELF = (1 << MCObjectFileInfo::IsELF), - MACHO = (1 << MCObjectFileInfo::IsMachO) + MACHO = (1 << MCObjectFileInfo::IsMachO), + WASM = (1 << MCObjectFileInfo::IsWasm), }; static const struct PrefixEntry { const char *Spelling; @@ -5518,6 +5396,9 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { case MCObjectFileInfo::IsCOFF: CurrentFormat = COFF; break; + case MCObjectFileInfo::IsWasm: + CurrentFormat = WASM; + break; } if (~Prefix->SupportedFormats & CurrentFormat) { @@ -6301,10 +6182,6 @@ bool ARMAsmParser::validatetLDMRegList(const MCInst &Inst, else if (ListContainsPC && ListContainsLR) return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), "PC and LR may not be in the register list simultaneously"); - else if (inITBlock() && !lastInITBlock() && ListContainsPC) - return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), - "instruction must be outside of IT block or the last " - "instruction in an IT block"); return false; } @@ -6366,6 +6243,12 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, return Warning(Loc, "predicated instructions should be in IT block"); } + // PC-setting instructions in an IT block, but not the last instruction of + // the block, are UNPREDICTABLE. + if (inExplicitITBlock() && !lastInITBlock() && isITBlockTerminator(Inst)) { + return Error(Loc, "instruction must be outside of IT block or the last instruction in an IT block"); + } + const unsigned Opcode = Inst.getOpcode(); switch (Opcode) { case ARM::LDRD: @@ -6676,6 +6559,7 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, break; } case ARM::MOVi16: + case ARM::MOVTi16: case ARM::t2MOVi16: case ARM::t2MOVTi16: { @@ -6977,6 +6861,17 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { bool ARMAsmParser::processInstruction(MCInst &Inst, const OperandVector &Operands, MCStreamer &Out) { + // Check if we have the wide qualifier, because if it's present we + // must avoid selecting a 16-bit thumb instruction. + bool HasWideQualifier = false; + for (auto &Op : Operands) { + ARMOperand &ARMOp = static_cast<ARMOperand&>(*Op); + if (ARMOp.isToken() && ARMOp.getToken() == ".w") { + HasWideQualifier = true; + break; + } + } + switch (Inst.getOpcode()) { // Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction. case ARM::LDRT_POST: @@ -7056,8 +6951,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // Select the narrow version if the immediate will fit. if (Inst.getOperand(1).getImm() > 0 && Inst.getOperand(1).getImm() <= 0xff && - !(static_cast<ARMOperand &>(*Operands[2]).isToken() && - static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w")) + !HasWideQualifier) Inst.setOpcode(ARM::tLDRpci); else Inst.setOpcode(ARM::t2LDRpci); @@ -7088,10 +6982,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, else if (Inst.getOpcode() == ARM::t2LDRConstPool) TmpInst.setOpcode(ARM::t2LDRpci); const ARMOperand &PoolOperand = - (static_cast<ARMOperand &>(*Operands[2]).isToken() && - static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w") ? - static_cast<ARMOperand &>(*Operands[4]) : - static_cast<ARMOperand &>(*Operands[3]); + (HasWideQualifier ? + static_cast<ARMOperand &>(*Operands[4]) : + static_cast<ARMOperand &>(*Operands[3])); const MCExpr *SubExprVal = PoolOperand.getConstantPoolImm(); // If SubExprVal is a constant we may be able to use a MOV if (isa<MCConstantExpr>(SubExprVal) && @@ -8232,10 +8125,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, case ARM::t2LSRri: case ARM::t2ASRri: { if (isARMLowRegister(Inst.getOperand(0).getReg()) && - Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && + isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && - !(static_cast<ARMOperand &>(*Operands[3]).isToken() && - static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) { + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -8269,7 +8161,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(1).getReg()) && isARMLowRegister(Inst.getOperand(2).getReg()) && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && - inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr)) + inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr) && + !HasWideQualifier) isNarrow = true; MCInst TmpInst; unsigned newOpc; @@ -8303,27 +8196,43 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, bool isNarrow = false; if (isARMLowRegister(Inst.getOperand(0).getReg()) && isARMLowRegister(Inst.getOperand(1).getReg()) && - inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi)) + inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi) && + !HasWideQualifier) isNarrow = true; MCInst TmpInst; unsigned newOpc; - switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) { - default: llvm_unreachable("unexpected opcode!"); - case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break; - case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break; - case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break; - case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break; - case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break; - } + unsigned Shift = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm()); unsigned Amount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()); + bool isMov = false; + // MOV rd, rm, LSL #0 is actually a MOV instruction + if (Shift == ARM_AM::lsl && Amount == 0) { + isMov = true; + // The 16-bit encoding of MOV rd, rm, LSL #N is explicitly encoding T2 of + // MOV (register) in the ARMv8-A and ARMv8-M manuals, and immediate 0 is + // unpredictable in an IT block so the 32-bit encoding T3 has to be used + // instead. + if (inITBlock()) { + isNarrow = false; + } + newOpc = isNarrow ? ARM::tMOVSr : ARM::t2MOVr; + } else { + switch(Shift) { + default: llvm_unreachable("unexpected opcode!"); + case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break; + case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break; + case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break; + case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break; + case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break; + } + } if (Amount == 32) Amount = 0; TmpInst.setOpcode(newOpc); TmpInst.addOperand(Inst.getOperand(0)); // Rd - if (isNarrow) + if (isNarrow && !isMov) TmpInst.addOperand(MCOperand::createReg( Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0)); TmpInst.addOperand(Inst.getOperand(1)); // Rn - if (newOpc != ARM::t2RRX) + if (newOpc != ARM::t2RRX && !isMov) TmpInst.addOperand(MCOperand::createImm(Amount)); TmpInst.addOperand(Inst.getOperand(3)); // CondCode TmpInst.addOperand(Inst.getOperand(4)); @@ -8515,11 +8424,10 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // wide encoding wasn't explicit. if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || !isARMLowRegister(Inst.getOperand(0).getReg()) || - (unsigned)Inst.getOperand(2).getImm() > 255 || - ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != 0)) || - (static_cast<ARMOperand &>(*Operands[3]).isToken() && - static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) + (Inst.getOperand(2).isImm() && + (unsigned)Inst.getOperand(2).getImm() > 255) || + Inst.getOperand(5).getReg() != (inITBlock() ? 0 : ARM::CPSR) || + HasWideQualifier) break; MCInst TmpInst; TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ? @@ -8548,8 +8456,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, } if (!Transform || Inst.getOperand(5).getReg() != 0 || - (static_cast<ARMOperand &>(*Operands[3]).isToken() && - static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) + HasWideQualifier) break; MCInst TmpInst; TmpInst.setOpcode(ARM::tADDhirr); @@ -8667,12 +8574,10 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // If we can use the 16-bit encoding and the user didn't explicitly // request the 32-bit variant, transform it here. if (isARMLowRegister(Inst.getOperand(0).getReg()) && - (unsigned)Inst.getOperand(1).getImm() <= 255 && - ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL && - Inst.getOperand(4).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(4).getReg() == 0)) && - (!static_cast<ARMOperand &>(*Operands[2]).isToken() || - static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) { + (Inst.getOperand(1).isImm() && + (unsigned)Inst.getOperand(1).getImm() <= 255) && + Inst.getOperand(4).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !HasWideQualifier) { // The operands aren't in the same order for tMOVi8... MCInst TmpInst; TmpInst.setOpcode(ARM::tMOVi8); @@ -8693,8 +8598,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(2).getImm() == ARMCC::AL && Inst.getOperand(4).getReg() == ARM::CPSR && - (!static_cast<ARMOperand &>(*Operands[2]).isToken() || - static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) { + !HasWideQualifier) { // The operands aren't the same for tMOV[S]r... (no cc_out) MCInst TmpInst; TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr); @@ -8716,8 +8620,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, if (isARMLowRegister(Inst.getOperand(0).getReg()) && isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(2).getImm() == 0 && - (!static_cast<ARMOperand &>(*Operands[2]).isToken() || - static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) { + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("Illegal opcode!"); @@ -8816,11 +8719,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, if ((isARMLowRegister(Inst.getOperand(1).getReg()) && isARMLowRegister(Inst.getOperand(2).getReg())) && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && - ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && - (!static_cast<ARMOperand &>(*Operands[3]).isToken() || - !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower( - ".w"))) { + Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -8856,11 +8756,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(2).getReg())) && (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() || Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) && - ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && - (!static_cast<ARMOperand &>(*Operands[3]).isToken() || - !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower( - ".w"))) { + Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -8918,6 +8815,9 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR && inITBlock()) return Match_RequiresNotITBlock; + // LSL with zero immediate is not allowed in an IT block + if (Opc == ARM::tLSLri && Inst.getOperand(3).getImm() == 0 && inITBlock()) + return Match_RequiresNotITBlock; } else if (isThumbOne()) { // Some high-register supporting Thumb1 encodings only allow both registers // to be from r0-r7 when in Thumb2. @@ -8932,6 +8832,22 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { return Match_RequiresV6; } + // Before ARMv8 the rules for when SP is allowed in t2MOVr are more complex + // than the loop below can handle, so it uses the GPRnopc register class and + // we do SP handling here. + if (Opc == ARM::t2MOVr && !hasV8Ops()) + { + // SP as both source and destination is not allowed + if (Inst.getOperand(0).getReg() == ARM::SP && + Inst.getOperand(1).getReg() == ARM::SP) + return Match_RequiresV8; + // When flags-setting SP as either source or destination is not allowed + if (Inst.getOperand(4).getReg() == ARM::CPSR && + (Inst.getOperand(0).getReg() == ARM::SP || + Inst.getOperand(1).getReg() == ARM::SP)) + return Match_RequiresV8; + } + for (unsigned I = 0; I < MCID.NumOperands; ++I) if (MCID.OpInfo[I].RegClass == ARM::rGPRRegClassID) { // rGPRRegClass excludes PC, and also excluded SP before ARMv8 @@ -8945,7 +8861,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { } namespace llvm { -template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) { +template <> inline bool IsCPSRDead<MCInst>(const MCInst *Instr) { return true; // In an assembly source, no need to second-guess } } @@ -8975,6 +8891,7 @@ bool ARMAsmParser::isITBlockTerminator(MCInst &Inst) const { // operands. We only care about Thumb instructions here, as ARM instructions // obviously can't be in an IT block. switch (Inst.getOpcode()) { + case ARM::tLDMIA: case ARM::t2LDMIA: case ARM::t2LDMIA_UPD: case ARM::t2LDMDB: @@ -9076,6 +8993,8 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst, return PlainMatchResult; } +std::string ARMMnemonicSpellCheck(StringRef S, uint64_t FBS); + static const char *getSubtargetFeatureName(uint64_t Val); bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, @@ -9088,6 +9007,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, MatchResult = MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm, PendConditionalInstruction, Out); + SMLoc ErrorLoc; + if (ErrorInfo < Operands.size()) { + ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + switch (MatchResult) { case Match_Success: // Context sensitive operand constraints aren't handled by the matcher, @@ -9162,9 +9088,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(ErrorLoc, "invalid operand for instruction"); } - case Match_MnemonicFail: - return Error(IDLoc, "invalid instruction", + case Match_MnemonicFail: { + uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + std::string Suggestion = ARMMnemonicSpellCheck( + ((ARMOperand &)*Operands[0]).getToken(), FBS); + return Error(IDLoc, "invalid instruction" + Suggestion, ((ARMOperand &)*Operands[0]).getLocRange()); + } case Match_RequiresNotITBlock: return Error(IDLoc, "flag setting instruction only valid outside IT block"); case Match_RequiresITBlock: @@ -9177,16 +9107,52 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, "instruction variant requires ARMv8 or later"); case Match_RequiresFlagSetting: return Error(IDLoc, "no flag-preserving variant of this instruction available"); - case Match_ImmRange0_15: { - SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); - if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + case Match_ImmRange0_1: + return Error(ErrorLoc, "immediate operand must be in the range [0,1]"); + case Match_ImmRange0_3: + return Error(ErrorLoc, "immediate operand must be in the range [0,3]"); + case Match_ImmRange0_7: + return Error(ErrorLoc, "immediate operand must be in the range [0,7]"); + case Match_ImmRange0_15: return Error(ErrorLoc, "immediate operand must be in the range [0,15]"); - } - case Match_ImmRange0_239: { - SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); - if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + case Match_ImmRange0_31: + return Error(ErrorLoc, "immediate operand must be in the range [0,31]"); + case Match_ImmRange0_32: + return Error(ErrorLoc, "immediate operand must be in the range [0,32]"); + case Match_ImmRange0_63: + return Error(ErrorLoc, "immediate operand must be in the range [0,63]"); + case Match_ImmRange0_239: return Error(ErrorLoc, "immediate operand must be in the range [0,239]"); - } + case Match_ImmRange0_255: + return Error(ErrorLoc, "immediate operand must be in the range [0,255]"); + case Match_ImmRange0_4095: + return Error(ErrorLoc, "immediate operand must be in the range [0,4095]"); + case Match_ImmRange0_65535: + return Error(ErrorLoc, "immediate operand must be in the range [0,65535]"); + case Match_ImmRange1_7: + return Error(ErrorLoc, "immediate operand must be in the range [1,7]"); + case Match_ImmRange1_8: + return Error(ErrorLoc, "immediate operand must be in the range [1,8]"); + case Match_ImmRange1_15: + return Error(ErrorLoc, "immediate operand must be in the range [1,15]"); + case Match_ImmRange1_16: + return Error(ErrorLoc, "immediate operand must be in the range [1,16]"); + case Match_ImmRange1_31: + return Error(ErrorLoc, "immediate operand must be in the range [1,31]"); + case Match_ImmRange1_32: + return Error(ErrorLoc, "immediate operand must be in the range [1,32]"); + case Match_ImmRange1_64: + return Error(ErrorLoc, "immediate operand must be in the range [1,64]"); + case Match_ImmRange8_8: + return Error(ErrorLoc, "immediate operand must be 8."); + case Match_ImmRange16_16: + return Error(ErrorLoc, "immediate operand must be 16."); + case Match_ImmRange32_32: + return Error(ErrorLoc, "immediate operand must be 32."); + case Match_ImmRange256_65535: + return Error(ErrorLoc, "immediate operand must be in the range [255,65535]"); + case Match_ImmRange0_16777215: + return Error(ErrorLoc, "immediate operand must be in the range [0,0xffffff]"); case Match_AlignedMemoryRequiresNone: case Match_DupAlignedMemoryRequiresNone: case Match_AlignedMemoryRequires16: @@ -10244,8 +10210,8 @@ static const struct { { ARM::AEK_CRYPTO, Feature_HasV8, {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} }, { ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} }, - { (ARM::AEK_HWDIV | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass, - {ARM::FeatureHWDiv, ARM::FeatureHWDivARM} }, + { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass, + {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} }, { ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} }, { ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} }, { ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} }, diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index ac3d8c7..5ab236b 100644 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -7,21 +7,24 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" -#include "MCTargetDesc/ARMMCExpr.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/Debug.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/LEB128.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> #include <vector> using namespace llvm; @@ -31,6 +34,7 @@ using namespace llvm; typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { + // Handles the condition code status of instructions in IT blocks class ITStatus { @@ -81,9 +85,7 @@ namespace { private: std::vector<unsigned char> ITStates; }; -} -namespace { /// ARM disassembler for all ARM platforms. class ARMDisassembler : public MCDisassembler { public: @@ -91,7 +93,7 @@ public: MCDisassembler(STI, Ctx) { } - ~ARMDisassembler() override {} + ~ARMDisassembler() override = default; DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, @@ -106,7 +108,7 @@ public: MCDisassembler(STI, Ctx) { } - ~ThumbDisassembler() override {} + ~ThumbDisassembler() override = default; DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, @@ -118,7 +120,8 @@ private: DecodeStatus AddThumbPredicate(MCInst&) const; void UpdateThumbVFPPredicate(MCInst&) const; }; -} + +} // end anonymous namespace static bool Check(DecodeStatus &Out, DecodeStatus In) { switch (In) { @@ -135,7 +138,6 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) { llvm_unreachable("Invalid DecodeStatus!"); } - // Forward declare these because the autogenerated code will reference them. // Definitions are further down. static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, @@ -319,7 +321,6 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); - static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, @@ -395,8 +396,9 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); + #include "ARMGenDisassemblerTables.inc" static MCDisassembler *createARMDisassembler(const Target &T, @@ -416,8 +418,7 @@ static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size, uint64_t Address, raw_ostream &OS, raw_ostream &CS, uint32_t Insn, - DecodeStatus Result) -{ + DecodeStatus Result) { switch (MI.getOpcode()) { case ARM::HVC: { // HVC is undefined if condition = 0xf otherwise upredictable @@ -461,74 +462,39 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result); } - // VFP and NEON instructions, similarly, are shared between ARM - // and Thumb modes. - Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI); - if (Result != MCDisassembler::Fail) { - Size = 4; - return Result; - } - - Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI); - if (Result != MCDisassembler::Fail) { - Size = 4; - return Result; - } - - Result = - decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI); - if (Result != MCDisassembler::Fail) { - Size = 4; - // Add a fake predicate operand, because we share these instruction - // definitions with Thumb2 where these instructions are predicable. - if (!DecodePredicateOperand(MI, 0xE, Address, this)) - return MCDisassembler::Fail; - return Result; - } - - Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address, - this, STI); - if (Result != MCDisassembler::Fail) { - Size = 4; - // Add a fake predicate operand, because we share these instruction - // definitions with Thumb2 where these instructions are predicable. - if (!DecodePredicateOperand(MI, 0xE, Address, this)) - return MCDisassembler::Fail; - return Result; - } - - Result = - decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI); - if (Result != MCDisassembler::Fail) { - Size = 4; - // Add a fake predicate operand, because we share these instruction - // definitions with Thumb2 where these instructions are predicable. - if (!DecodePredicateOperand(MI, 0xE, Address, this)) - return MCDisassembler::Fail; - return Result; - } + struct DecodeTable { + const uint8_t *P; + bool DecodePred; + }; - Result = - decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI); - if (Result != MCDisassembler::Fail) { - Size = 4; - return Result; - } + const DecodeTable Tables[] = { + {DecoderTableVFP32, false}, {DecoderTableVFPV832, false}, + {DecoderTableNEONData32, true}, {DecoderTableNEONLoadStore32, true}, + {DecoderTableNEONDup32, true}, {DecoderTablev8NEON32, false}, + {DecoderTablev8Crypto32, false}, + }; - Result = - decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI); - if (Result != MCDisassembler::Fail) { - Size = 4; - return Result; + for (auto Table : Tables) { + Result = decodeInstruction(Table.P, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + // Add a fake predicate operand, because we share these instruction + // definitions with Thumb2 where these instructions are predicable. + if (Table.DecodePred && !DecodePredicateOperand(MI, 0xE, Address, this)) + return MCDisassembler::Fail; + return Result; + } } - Size = 0; + Size = 4; return MCDisassembler::Fail; } namespace llvm { + extern const MCInstrDesc ARMInsts[]; -} + +} // end namespace llvm /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the /// immediate Value in the MCInst. The immediate Value has had any PC @@ -859,7 +825,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return MCDisassembler::Fail; } - extern "C" void LLVMInitializeARMDisassembler() { TargetRegistry::RegisterMCDisassembler(getTheARMLETarget(), createARMDisassembler); @@ -1056,7 +1021,6 @@ static const uint16_t QPRDecoderTable[] = { ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15 }; - static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 31 || (RegNo & 1) != 0) @@ -1676,7 +1640,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST: - if (type && Rn == 15){ + if (type && Rn == 15) { if (Rt2 == 15) S = MCDisassembler::SoftFail; break; @@ -1693,7 +1657,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, case ARM::LDRH: case ARM::LDRH_PRE: case ARM::LDRH_POST: - if (type && Rn == 15){ + if (type && Rn == 15) { if (Rt == 15) S = MCDisassembler::SoftFail; break; @@ -1711,7 +1675,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, case ARM::LDRSB: case ARM::LDRSB_PRE: case ARM::LDRSB_POST: - if (type && Rn == 15){ + if (type && Rn == 15) { if (Rt == 15) S = MCDisassembler::SoftFail; break; @@ -2309,7 +2273,6 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, return S; } - static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3748,7 +3711,6 @@ static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } - static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4073,7 +4035,7 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder){ + uint64_t Address, const void *Decoder) { if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<9>(Val<<1) + 4, true, 2, Inst, Decoder)) Inst.addOperand(MCOperand::createImm(SignExtend32<9>(Val << 1))); @@ -4081,7 +4043,8 @@ DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder){ + uint64_t Address, + const void *Decoder) { // Val is passed in as S:J1:J2:imm10:imm11 // Note no trailing zero after imm11. Also the J1 and J2 values are from // the encoded instruction. So here change to I1 and I2 values via: @@ -4247,7 +4210,8 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder){ + uint64_t Address, + const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -4323,7 +4287,6 @@ static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, return S; } - static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4506,7 +4469,6 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, return S; } - static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4637,7 +4599,6 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, return S; } - static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4771,7 +4732,6 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, return S; } - static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -5266,9 +5226,8 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - DecodeStatus S = MCDisassembler::Success; unsigned CRm = fieldFromInstruction(Val, 0, 4); diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 3667952..57b9136 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -20,7 +20,15 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> + using namespace llvm; #define DEBUG_TYPE "asm-printer" @@ -73,7 +81,6 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, unsigned Opcode = MI->getOpcode(); switch (Opcode) { - // Check for MOVs and print canonical forms, instead. case ARM::MOVsr: { // FIXME: Thumb variants? diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 9d80eed..86873a3 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -235,4 +235,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index a58d5b3..a77df7a 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -7,15 +7,17 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" -#include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMAsmBackend.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMAsmBackendDarwin.h" #include "MCTargetDesc/ARMAsmBackendELF.h" #include "MCTargetDesc/ARMAsmBackendWinCOFF.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -31,10 +33,8 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -98,6 +98,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_movt_hi16", 0, 20, 0}, {"fixup_t2_movw_lo16", 0, 20, 0}, {"fixup_arm_mod_imm", 0, 12, 0}, + {"fixup_t2_so_imm", 0, 26, 0}, }; const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = { // This table *must* be in the order that the fixup_* kinds are defined in @@ -148,6 +149,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_movt_hi16", 12, 20, 0}, {"fixup_t2_movw_lo16", 12, 20, 0}, {"fixup_arm_mod_imm", 20, 12, 0}, + {"fixup_t2_so_imm", 26, 6, 0}, }; if (Kind < FirstTargetFixupKind) @@ -356,14 +358,30 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf, return Value; } -unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, - bool IsPCRel, MCContext *Ctx, - bool IsLittleEndian, - bool IsResolved) const { +unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, uint64_t Value, + bool IsResolved, MCContext &Ctx, + bool IsLittleEndian) const { unsigned Kind = Fixup.getKind(); + + // MachO tries to make .o files that look vaguely pre-linked, so for MOVW/MOVT + // and .word relocations they put the Thumb bit into the addend if possible. + // Other relocation types don't want this bit though (branches couldn't encode + // it if it *was* present, and no other relocations exist) and it can + // interfere with checking valid expressions. + if (const MCSymbolRefExpr *A = Target.getSymA()) { + if (A->hasSubsectionsViaSymbols() && Asm.isThumbFunc(&A->getSymbol()) && + (Kind == FK_Data_4 || Kind == ARM::fixup_arm_movw_lo16 || + Kind == ARM::fixup_arm_movt_hi16 || Kind == ARM::fixup_t2_movw_lo16 || + Kind == ARM::fixup_t2_movt_hi16)) + Value |= 1; + } + switch (Kind) { default: - llvm_unreachable("Unknown fixup kind!"); + Ctx.reportError(Fixup.getLoc(), "bad relocation fixup type"); + return 0; case FK_Data_1: case FK_Data_2: case FK_Data_4: @@ -373,7 +391,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case FK_SecRel_4: return Value; case ARM::fixup_arm_movt_hi16: - if (!IsPCRel) + if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF()) Value >>= 16; LLVM_FALLTHROUGH; case ARM::fixup_arm_movw_lo16: { @@ -385,7 +403,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return Value; } case ARM::fixup_t2_movt_hi16: - if (!IsPCRel) + if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF()) Value >>= 16; LLVM_FALLTHROUGH; case ARM::fixup_t2_movw_lo16: { @@ -412,8 +430,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, Value = -Value; isAdd = false; } - if (Ctx && Value >= 4096) { - Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if (Value >= 4096) { + Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); return 0; } Value |= isAdd << 23; @@ -433,8 +451,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, Value = -Value; opc = 2; // 0b0010 } - if (Ctx && ARM_AM::getSOImmVal(Value) == -1) { - Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if (ARM_AM::getSOImmVal(Value) == -1) { + Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); return 0; } // Encode the immediate and shift the opcode into place. @@ -502,6 +520,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return swapHalfWords(out, IsLittleEndian); } case ARM::fixup_arm_thumb_bl: { + // FIXME: We get both thumb1 and thumb2 in here, so we can only check for + // the less strict thumb2 value. + if (!isInt<26>(Value - 4)) { + Ctx.reportError(Fixup.getLoc(), "Relocation out of range"); + return 0; + } + // The value doesn't encode the low bit (always zero) and is offset by // four. The 32-bit immediate value is encoded as // imm32 = SignExtend(S:I1:I2:imm10:imm11:0) @@ -541,8 +566,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // // Note that the halfwords are stored high first, low second; so we need // to transpose the fixup value here to map properly. - if (Ctx && Value % 4 != 0) { - Ctx->reportError(Fixup.getLoc(), "misaligned ARM call destination"); + if (Value % 4 != 0) { + Ctx.reportError(Fixup.getLoc(), "misaligned ARM call destination"); return 0; } @@ -568,10 +593,10 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_arm_thumb_cp: // On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we // could have an error on our hands. - if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) { + if (!STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) { const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value); if (FixupDiagnostic) { - Ctx->reportError(Fixup.getLoc(), FixupDiagnostic); + Ctx.reportError(Fixup.getLoc(), FixupDiagnostic); return 0; } } @@ -581,8 +606,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // CB instructions can only branch to offsets in [4, 126] in multiples of 2 // so ensure that the raw value LSB is zero and it lies in [2, 130]. // An offset of 2 will be relaxed to a NOP. - if (Ctx && ((int64_t)Value < 2 || Value > 0x82 || Value & 1)) { - Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if ((int64_t)Value < 2 || Value > 0x82 || Value & 1) { + Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); return 0; } // Offset by 4 and don't encode the lower bit, which is always 0. @@ -592,21 +617,21 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, } case ARM::fixup_arm_thumb_br: // Offset by 4 and don't encode the lower bit, which is always 0. - if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] && - !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) { + if (!STI->getFeatureBits()[ARM::FeatureThumb2] && + !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) { const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value); if (FixupDiagnostic) { - Ctx->reportError(Fixup.getLoc(), FixupDiagnostic); + Ctx.reportError(Fixup.getLoc(), FixupDiagnostic); return 0; } } return ((Value - 4) >> 1) & 0x7ff; case ARM::fixup_arm_thumb_bcc: // Offset by 4 and don't encode the lower bit, which is always 0. - if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) { + if (!STI->getFeatureBits()[ARM::FeatureThumb2]) { const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value); if (FixupDiagnostic) { - Ctx->reportError(Fixup.getLoc(), FixupDiagnostic); + Ctx.reportError(Fixup.getLoc(), FixupDiagnostic); return 0; } } @@ -620,8 +645,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, isAdd = false; } // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8]. - if (Ctx && Value >= 256) { - Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if (Value >= 256) { + Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); return 0; } Value = (Value & 0xf) | ((Value & 0xf0) << 4); @@ -641,8 +666,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, } // These values don't encode the low two bits since they're always zero. Value >>= 2; - if (Ctx && Value >= 256) { - Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if (Value >= 256) { + Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); return 0; } Value |= isAdd << 23; @@ -667,13 +692,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, isAdd = false; } // These values don't encode the low bit since it's always zero. - if (Ctx && (Value & 1)) { - Ctx->reportError(Fixup.getLoc(), "invalid value for this fixup"); + if (Value & 1) { + Ctx.reportError(Fixup.getLoc(), "invalid value for this fixup"); return 0; } Value >>= 1; - if (Ctx && Value >= 256) { - Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if (Value >= 256) { + Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); return 0; } Value |= isAdd << 23; @@ -687,38 +712,38 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, } case ARM::fixup_arm_mod_imm: Value = ARM_AM::getSOImmVal(Value); - if (Ctx && Value >> 12) { - Ctx->reportError(Fixup.getLoc(), "out of range immediate fixup value"); + if (Value >> 12) { + Ctx.reportError(Fixup.getLoc(), "out of range immediate fixup value"); return 0; } return Value; + case ARM::fixup_t2_so_imm: { + Value = ARM_AM::getT2SOImmVal(Value); + if ((int64_t)Value < 0) { + Ctx.reportError(Fixup.getLoc(), "out of range immediate fixup value"); + return 0; + } + // Value will contain a 12-bit value broken up into a 4-bit shift in bits + // 11:8 and the 8-bit immediate in 0:7. The instruction has the immediate + // in 0:7. The 4-bit shift is split up into i:imm3 where i is placed at bit + // 10 of the upper half-word and imm3 is placed at 14:12 of the lower + // half-word. + uint64_t EncValue = 0; + EncValue |= (Value & 0x800) << 15; + EncValue |= (Value & 0x700) << 4; + EncValue |= (Value & 0xff); + return swapHalfWords(EncValue, IsLittleEndian); + } } } -void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, - const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) { +bool ARMAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { const MCSymbolRefExpr *A = Target.getSymA(); const MCSymbol *Sym = A ? &A->getSymbol() : nullptr; - // MachO (the only user of "Value") tries to make .o files that look vaguely - // pre-linked, so for MOVW/MOVT and .word relocations they put the Thumb bit - // into the addend if possible. Other relocation types don't want this bit - // though (branches couldn't encode it if it *was* present, and no other - // relocations exist) and it can interfere with checking valid expressions. - if ((unsigned)Fixup.getKind() == FK_Data_4 || - (unsigned)Fixup.getKind() == ARM::fixup_arm_movw_lo16 || - (unsigned)Fixup.getKind() == ARM::fixup_arm_movt_hi16 || - (unsigned)Fixup.getKind() == ARM::fixup_t2_movw_lo16 || - (unsigned)Fixup.getKind() == ARM::fixup_t2_movt_hi16) { - if (Sym) { - if (Asm.isThumbFunc(Sym)) - Value |= 1; - } - } - if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { + const unsigned FixupKind = Fixup.getKind() ; + if ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { assert(Sym && "How did we resolve this?"); // If the symbol is external the linker will handle it. @@ -726,23 +751,32 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // If the symbol is out of range, produce a relocation and hope the // linker can handle it. GNU AS produces an error in this case. - if (Sym->isExternal() || Value >= 0x400004) - IsResolved = false; + if (Sym->isExternal()) + return true; + } + // Create relocations for unconditional branches to function symbols with + // different execution mode in ELF binaries. + if (Sym && Sym->isELF()) { + unsigned Type = dyn_cast<MCSymbolELF>(Sym)->getType(); + if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) { + if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch)) + return true; + if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br || + FixupKind == ARM::fixup_arm_thumb_bl || + FixupKind == ARM::fixup_t2_condbranch || + FixupKind == ARM::fixup_t2_uncondbranch)) + return true; + } } // We must always generate a relocation for BL/BLX instructions if we have // a symbol to reference, as the linker relies on knowing the destination // symbol's thumb-ness to get interworking right. - if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl || - (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl)) - IsResolved = false; - - // Try to get the encoded value for the fixup as-if we're mapping it into - // the instruction. This allows adjustFixupValue() to issue a diagnostic - // if the value aren't invalid. - (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(), - IsLittleEndian, IsResolved); + if (A && (FixupKind == ARM::fixup_arm_thumb_blx || + FixupKind == ARM::fixup_arm_blx || + FixupKind == ARM::fixup_arm_uncondbl || + FixupKind == ARM::fixup_arm_condbl)) + return true; + return false; } /// getFixupKindNumBytes - The number of bytes the fixup may change. @@ -788,6 +822,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_arm_movw_lo16: case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_so_imm: return 4; case FK_SecRel_2: @@ -840,28 +875,31 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) { case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movw_lo16: case ARM::fixup_arm_mod_imm: + case ARM::fixup_t2_so_imm: // Instruction size is 4 bytes. return 4; } } -void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel) const { +void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef<char> Data, uint64_t Value, + bool IsResolved) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); - Value = - adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian, true); + MCContext &Ctx = Asm.getContext(); + Value = adjustFixupValue(Asm, Fixup, Target, Value, IsResolved, Ctx, + IsLittleEndian); if (!Value) return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); // Used to point to big endian bytes. unsigned FullSizeBytes; if (!IsLittleEndian) { FullSizeBytes = getFixupKindContainerSizeBytes(Fixup.getKind()); - assert((Offset + FullSizeBytes) <= DataSize && "Invalid fixup size!"); + assert((Offset + FullSizeBytes) <= Data.size() && "Invalid fixup size!"); assert(NumBytes <= FullSizeBytes && "Invalid fixup size!"); } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h index 84caaac..0237496 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -38,19 +38,17 @@ public: const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; - /// processFixupValue - Target hook to process the literal value of a fixup - /// if necessary. - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; - - unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel, - MCContext *Ctx, bool IsLittleEndian, - bool IsResolved) const; - - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const override; + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override; + + unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, uint64_t Value, + bool IsResolved, MCContext &Ctx, + bool IsLittleEndian) const; + + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsResolved) const override; unsigned getRelaxedOpcode(unsigned Op) const; diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h index 09dc017..bd729fa 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -11,7 +11,7 @@ #define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H #include "ARMAsmBackend.h" -#include "llvm/Support/MachO.h" +#include "llvm/BinaryFormat/MachO.h" namespace llvm { class ARMAsmBackendDarwin : public ARMAsmBackend { diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 088b420..92e553f 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -291,7 +291,11 @@ namespace ARMII { /// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects /// just that part of the flag set. - MO_OPTION_MASK = 0x1f, + MO_OPTION_MASK = 0x0f, + + /// MO_SBREL - On a symbol operand, this represents a static base relative + /// relocation. Used in movw and movt instructions. + MO_SBREL = 0x10, /// MO_DLLIMPORT - On a symbol operand, this represents that the reference /// to the symbol is for an import stub. This is used for DLL import diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 6f19754..59f31be 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -7,32 +7,32 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMFixupKinds.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringSwitch.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include <cstdint> using namespace llvm; namespace { + class ARMELFObjectWriter : public MCELFObjectTargetWriter { enum { DefaultEABIVersion = 0x05000000U }; - unsigned GetRelocTypeInner(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const; + unsigned GetRelocTypeInner(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, MCContext &Ctx) const; public: ARMELFObjectWriter(uint8_t OSABI); - ~ARMELFObjectWriter() override; + ~ARMELFObjectWriter() override = default; unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; @@ -40,15 +40,14 @@ namespace { bool needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const override; }; -} + +} // end anonymous namespace ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI) : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM, /*HasRelocationAddend*/ false) {} -ARMELFObjectWriter::~ARMELFObjectWriter() {} - bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const { // FIXME: This is extremely conservative. This really needs to use a @@ -70,19 +69,20 @@ bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, unsigned ARMELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { - return GetRelocTypeInner(Target, Fixup, IsPCRel); + return GetRelocTypeInner(Target, Fixup, IsPCRel, Ctx); } unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel) const { + bool IsPCRel, + MCContext &Ctx) const { MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); unsigned Type = 0; if (IsPCRel) { switch ((unsigned)Fixup.getKind()) { default: - report_fatal_error("unsupported relocation on symbol"); + Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol"); return ELF::R_ARM_NONE; case FK_Data_4: switch (Modifier) { @@ -161,7 +161,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, } else { switch ((unsigned)Fixup.getKind()) { default: - report_fatal_error("unsupported relocation on symbol"); + Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol"); return ELF::R_ARM_NONE; case FK_Data_1: switch (Modifier) { @@ -270,10 +270,26 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, } break; case ARM::fixup_t2_movt_hi16: - Type = ELF::R_ARM_THM_MOVT_ABS; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_THM_MOVT_ABS; + break; + case MCSymbolRefExpr::VK_ARM_SBREL: + Type = ELF:: R_ARM_THM_MOVT_BREL; + break; + } break; case ARM::fixup_t2_movw_lo16: - Type = ELF::R_ARM_THM_MOVW_ABS_NC; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_THM_MOVW_ABS_NC; + break; + case MCSymbolRefExpr::VK_ARM_SBREL: + Type = ELF:: R_ARM_THM_MOVW_BREL_NC; + break; + } break; } } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index f6bb35d..93f4006 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -15,8 +15,13 @@ #include "ARMRegisterInfo.h" #include "ARMUnwindOpAsm.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -24,25 +29,32 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCValue.h" +#include "llvm/MC/SectionKind.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" -#include "llvm/Support/TargetParser.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> +#include <cassert> +#include <climits> +#include <cstddef> +#include <cstdint> +#include <string> using namespace llvm; @@ -101,16 +113,21 @@ ARMTargetAsmStreamer::ARMTargetAsmStreamer(MCStreamer &S, bool VerboseAsm) : ARMTargetStreamer(S), OS(OS), InstPrinter(InstPrinter), IsVerboseAsm(VerboseAsm) {} + void ARMTargetAsmStreamer::emitFnStart() { OS << "\t.fnstart\n"; } void ARMTargetAsmStreamer::emitFnEnd() { OS << "\t.fnend\n"; } void ARMTargetAsmStreamer::emitCantUnwind() { OS << "\t.cantunwind\n"; } + void ARMTargetAsmStreamer::emitPersonality(const MCSymbol *Personality) { OS << "\t.personality " << Personality->getName() << '\n'; } + void ARMTargetAsmStreamer::emitPersonalityIndex(unsigned Index) { OS << "\t.personalityindex " << Index << '\n'; } + void ARMTargetAsmStreamer::emitHandlerData() { OS << "\t.handlerdata\n"; } + void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) { OS << "\t.setfp\t"; @@ -121,6 +138,7 @@ void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, OS << ", #" << Offset; OS << '\n'; } + void ARMTargetAsmStreamer::emitMovSP(unsigned Reg, int64_t Offset) { assert((Reg != ARM::SP && Reg != ARM::PC) && "the operand of .movsp cannot be either sp or pc"); @@ -131,9 +149,11 @@ void ARMTargetAsmStreamer::emitMovSP(unsigned Reg, int64_t Offset) { OS << ", #" << Offset; OS << '\n'; } + void ARMTargetAsmStreamer::emitPad(int64_t Offset) { OS << "\t.pad\t#" << Offset << '\n'; } + void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector) { assert(RegList.size() && "RegList should not be empty"); @@ -151,8 +171,9 @@ void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList, OS << "}\n"; } -void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) { -} + +void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) {} + void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) { OS << "\t.eabi_attribute\t" << Attribute << ", " << Twine(Value); if (IsVerboseAsm) { @@ -162,6 +183,7 @@ void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) { } OS << "\n"; } + void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute, StringRef String) { switch (Attribute) { @@ -179,6 +201,7 @@ void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute, } OS << "\n"; } + void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute, unsigned IntValue, StringRef StringValue) { @@ -194,20 +217,25 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute, } OS << "\n"; } + void ARMTargetAsmStreamer::emitArch(unsigned Arch) { OS << "\t.arch\t" << ARM::getArchName(Arch) << "\n"; } + void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) { OS << "\t.arch_extension\t" << ARM::getArchExtName(ArchExt) << "\n"; } + void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) { OS << "\t.object_arch\t" << ARM::getArchName(Arch) << '\n'; } + void ARMTargetAsmStreamer::emitFPU(unsigned FPU) { OS << "\t.fpu\t" << ARM::getFPUName(FPU) << "\n"; } -void ARMTargetAsmStreamer::finishAttributeSection() { -} + +void ARMTargetAsmStreamer::finishAttributeSection() {} + void ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) { OS << "\t.tlsdescseq\t" << S->getSymbol().getName(); @@ -274,12 +302,12 @@ private: }; StringRef CurrentVendor; - unsigned FPU; - unsigned Arch; - unsigned EmittedArch; + unsigned FPU = ARM::FK_INVALID; + unsigned Arch = ARM::AK_INVALID; + unsigned EmittedArch = ARM::AK_INVALID; SmallVector<AttributeItem, 64> Contents; - MCSection *AttributeSection; + MCSection *AttributeSection = nullptr; AttributeItem *getAttributeItem(unsigned Attribute) { for (size_t i = 0; i < Contents.size(); ++i) @@ -393,9 +421,7 @@ private: public: ARMTargetELFStreamer(MCStreamer &S) - : ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::FK_INVALID), - Arch(ARM::AK_INVALID), EmittedArch(ARM::AK_INVALID), - AttributeSection(nullptr) {} + : ARMTargetStreamer(S), CurrentVendor("aeabi") {} }; /// Extend the generic ELFStreamer class so that it can emit mapping symbols at @@ -416,12 +442,11 @@ public: ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool IsThumb) - : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb), - MappingSymbolCounter(0), LastEMS(EMS_None) { + : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb) { EHReset(); } - ~ARMELFStreamer() {} + ~ARMELFStreamer() override = default; void FinishImpl() override; @@ -439,20 +464,21 @@ public: void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes); void ChangeSection(MCSection *Section, const MCExpr *Subsection) override { - // We have to keep track of the mapping symbol state of any sections we - // use. Each one should start off as EMS_None, which is provided as the - // default constructor by DenseMap::lookup. - LastMappingSymbols[getPreviousSection().first] = LastEMS; - LastEMS = LastMappingSymbols.lookup(Section); - + LastMappingSymbols[getCurrentSection().first] = std::move(LastEMSInfo); MCELFStreamer::ChangeSection(Section, Subsection); + auto LastMappingSymbol = LastMappingSymbols.find(Section); + if (LastMappingSymbol != LastMappingSymbols.end()) { + LastEMSInfo = std::move(LastMappingSymbol->second); + return; + } + LastEMSInfo.reset(new ElfMappingSymbolInfo(SMLoc(), nullptr, 0)); } /// This function is the one used to emit instruction data into the ELF /// streamer. We override it to add the appropriate mapping symbol if /// necessary. - void EmitInstruction(const MCInst& Inst, - const MCSubtargetInfo &STI) override { + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + bool) override { if (IsThumb) EmitThumbMappingSymbol(); else @@ -507,15 +533,25 @@ public: MCELFStreamer::EmitBytes(Data); } + void FlushPendingMappingSymbol() { + if (!LastEMSInfo->hasInfo()) + return; + ElfMappingSymbolInfo *EMS = LastEMSInfo.get(); + EmitMappingSymbol("$d", EMS->Loc, EMS->F, EMS->Offset); + EMS->resetInfo(); + } + /// This is one of the functions used to emit data into an ELF section, so the /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if /// necessary. void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override { - if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value)) + if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value)) { if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4)) { getContext().reportError(Loc, "relocated expression must be 32-bit"); return; } + getOrCreateDataFragment(); + } EmitDataMappingSymbol(); MCELFStreamer::EmitValueImpl(Value, Size, Loc); @@ -548,22 +584,54 @@ private: EMS_Data }; + struct ElfMappingSymbolInfo { + explicit ElfMappingSymbolInfo(SMLoc Loc, MCFragment *F, uint64_t O) + : Loc(Loc), F(F), Offset(O), State(EMS_None) {} + void resetInfo() { + F = nullptr; + Offset = 0; + } + bool hasInfo() { return F != nullptr; } + SMLoc Loc; + MCFragment *F; + uint64_t Offset; + ElfMappingSymbol State; + }; + void EmitDataMappingSymbol() { - if (LastEMS == EMS_Data) return; + if (LastEMSInfo->State == EMS_Data) + return; + else if (LastEMSInfo->State == EMS_None) { + // This is a tentative symbol, it won't really be emitted until it's + // actually needed. + ElfMappingSymbolInfo *EMS = LastEMSInfo.get(); + auto *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); + if (!DF) + return; + EMS->Loc = SMLoc(); + EMS->F = getCurrentFragment(); + EMS->Offset = DF->getContents().size(); + LastEMSInfo->State = EMS_Data; + return; + } EmitMappingSymbol("$d"); - LastEMS = EMS_Data; + LastEMSInfo->State = EMS_Data; } void EmitThumbMappingSymbol() { - if (LastEMS == EMS_Thumb) return; + if (LastEMSInfo->State == EMS_Thumb) + return; + FlushPendingMappingSymbol(); EmitMappingSymbol("$t"); - LastEMS = EMS_Thumb; + LastEMSInfo->State = EMS_Thumb; } void EmitARMMappingSymbol() { - if (LastEMS == EMS_ARM) return; + if (LastEMSInfo->State == EMS_ARM) + return; + FlushPendingMappingSymbol(); EmitMappingSymbol("$a"); - LastEMS = EMS_ARM; + LastEMSInfo->State = EMS_ARM; } void EmitMappingSymbol(StringRef Name) { @@ -576,6 +644,17 @@ private: Symbol->setExternal(false); } + void EmitMappingSymbol(StringRef Name, SMLoc Loc, MCFragment *F, + uint64_t Offset) { + auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol( + Name + "." + Twine(MappingSymbolCounter++))); + EmitLabel(Symbol, Loc, F); + Symbol->setType(ELF::STT_NOTYPE); + Symbol->setBinding(ELF::STB_LOCAL); + Symbol->setExternal(false); + Symbol->setOffset(Offset); + } + void EmitThumbFunc(MCSymbol *Func) override { getAssembler().setIsThumbFunc(Func); EmitSymbolAttribute(Func, MCSA_ELF_TypeFunction); @@ -599,10 +678,12 @@ private: void EmitFixup(const MCExpr *Expr, MCFixupKind Kind); bool IsThumb; - int64_t MappingSymbolCounter; + int64_t MappingSymbolCounter = 0; + + DenseMap<const MCSection *, std::unique_ptr<ElfMappingSymbolInfo>> + LastMappingSymbols; - DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols; - ElfMappingSymbol LastEMS; + std::unique_ptr<ElfMappingSymbolInfo> LastEMSInfo; // ARM Exception Handling Frame Information MCSymbol *ExTab; @@ -618,6 +699,7 @@ private: SmallVector<uint8_t, 64> Opcodes; UnwindOpcodeAssembler UnwindOpAsm; }; + } // end anonymous namespace ARMELFStreamer &ARMTargetELFStreamer::getStreamer() { @@ -627,33 +709,42 @@ ARMELFStreamer &ARMTargetELFStreamer::getStreamer() { void ARMTargetELFStreamer::emitFnStart() { getStreamer().emitFnStart(); } void ARMTargetELFStreamer::emitFnEnd() { getStreamer().emitFnEnd(); } void ARMTargetELFStreamer::emitCantUnwind() { getStreamer().emitCantUnwind(); } + void ARMTargetELFStreamer::emitPersonality(const MCSymbol *Personality) { getStreamer().emitPersonality(Personality); } + void ARMTargetELFStreamer::emitPersonalityIndex(unsigned Index) { getStreamer().emitPersonalityIndex(Index); } + void ARMTargetELFStreamer::emitHandlerData() { getStreamer().emitHandlerData(); } + void ARMTargetELFStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) { getStreamer().emitSetFP(FpReg, SpReg, Offset); } + void ARMTargetELFStreamer::emitMovSP(unsigned Reg, int64_t Offset) { getStreamer().emitMovSP(Reg, Offset); } + void ARMTargetELFStreamer::emitPad(int64_t Offset) { getStreamer().emitPad(Offset); } + void ARMTargetELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector) { getStreamer().emitRegSave(RegList, isVector); } + void ARMTargetELFStreamer::emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes) { getStreamer().emitUnwindRaw(Offset, Opcodes); } + void ARMTargetELFStreamer::switchVendor(StringRef Vendor) { assert(!Vendor.empty() && "Vendor cannot be empty."); @@ -668,25 +759,31 @@ void ARMTargetELFStreamer::switchVendor(StringRef Vendor) { CurrentVendor = Vendor; } + void ARMTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) { setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true); } + void ARMTargetELFStreamer::emitTextAttribute(unsigned Attribute, StringRef Value) { setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true); } + void ARMTargetELFStreamer::emitIntTextAttribute(unsigned Attribute, unsigned IntValue, StringRef StringValue) { setAttributeItems(Attribute, IntValue, StringValue, /* OverwriteExisting= */ true); } + void ARMTargetELFStreamer::emitArch(unsigned Value) { Arch = Value; } + void ARMTargetELFStreamer::emitObjectArch(unsigned Value) { EmittedArch = Value; } + void ARMTargetELFStreamer::emitArchDefaultAttributes() { using namespace ARMBuildAttrs; @@ -786,9 +883,11 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { break; } } + void ARMTargetELFStreamer::emitFPU(unsigned Value) { FPU = Value; } + void ARMTargetELFStreamer::emitFPUDefaultAttributes() { switch (FPU) { case ARM::FK_VFP: @@ -920,6 +1019,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { break; } } + size_t ARMTargetELFStreamer::calculateContentSize() const { size_t Result = 0; for (size_t i = 0; i < Contents.size(); ++i) { @@ -944,6 +1044,7 @@ size_t ARMTargetELFStreamer::calculateContentSize() const { } return Result; } + void ARMTargetELFStreamer::finishAttributeSection() { // <format-version> // [ <section-length> "vendor-name" @@ -1093,9 +1194,9 @@ inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix, const MCSymbolELF *Group = FnSection.getGroup(); if (Group) Flags |= ELF::SHF_GROUP; - MCSectionELF *EHSection = - getContext().getELFSection(EHSecName, Type, Flags, 0, Group, - FnSection.getUniqueID(), nullptr, &FnSection); + MCSectionELF *EHSection = getContext().getELFSection( + EHSecName, Type, Flags, 0, Group, FnSection.getUniqueID(), + static_cast<const MCSymbolELF *>(&Fn)); assert(EHSection && "Failed to get the required EH section"); @@ -1114,6 +1215,7 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) { ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER, SectionKind::getData(), FnStart); } + void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) { MCDataFragment *Frag = getOrCreateDataFragment(); Frag->getFixups().push_back(MCFixup::create(Frag->getContents().size(), Expr, @@ -1396,8 +1498,6 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, if (RelaxAll) S->getAssembler().setRelaxAll(true); return S; - } - } - +} // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 3fe2302..831589b 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -15,55 +15,47 @@ namespace llvm { namespace ARM { enum Fixups { - // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol - // addresses + // 12-bit PC relative relocation for symbol addresses fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind, - // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with - // the 16-bit halfwords reordered. + // Equivalent to fixup_arm_ldst_pcrel_12, with the 16-bit halfwords reordered. fixup_t2_ldst_pcrel_12, - // fixup_arm_pcrel_10_unscaled - 10-bit PC relative relocation for symbol - // addresses used in LDRD/LDRH/LDRB/etc. instructions. All bits are encoded. + // 10-bit PC relative relocation for symbol addresses used in + // LDRD/LDRH/LDRB/etc. instructions. All bits are encoded. fixup_arm_pcrel_10_unscaled, - // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses - // used in VFP instructions where the lower 2 bits are not encoded - // (so it's encoded as an 8-bit immediate). + // 10-bit PC relative relocation for symbol addresses used in VFP instructions + // where the lower 2 bits are not encoded (so it's encoded as an 8-bit + // immediate). fixup_arm_pcrel_10, - // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for - // the short-swapped encoding of Thumb2 instructions. + // Equivalent to fixup_arm_pcrel_10, accounting for the short-swapped encoding + // of Thumb2 instructions. fixup_t2_pcrel_10, - // fixup_arm_pcrel_9 - 9-bit PC relative relocation for symbol addresses - // used in VFP instructions where bit 0 not encoded (so it's encoded as an - // 8-bit immediate). + // 9-bit PC relative relocation for symbol addresses used in VFP instructions + // where bit 0 not encoded (so it's encoded as an 8-bit immediate). fixup_arm_pcrel_9, - // fixup_t2_pcrel_9 - Equivalent to fixup_arm_pcrel_9, accounting for - // the short-swapped encoding of Thumb2 instructions. + // Equivalent to fixup_arm_pcrel_9, accounting for the short-swapped encoding + // of Thumb2 instructions. fixup_t2_pcrel_9, - // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol - // addresses where the lower 2 bits are not encoded (so it's encoded as an - // 8-bit immediate). + // 10-bit PC relative relocation for symbol addresses where the lower 2 bits + // are not encoded (so it's encoded as an 8-bit immediate). fixup_thumb_adr_pcrel_10, - // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR - // instruction. + // 12-bit PC relative relocation for the ADR instruction. fixup_arm_adr_pcrel_12, - // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR - // instruction. + // 12-bit PC relative relocation for the ADR instruction. fixup_t2_adr_pcrel_12, - // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch - // instructions. + // 24-bit PC relative relocation for conditional branch instructions. fixup_arm_condbranch, - // fixup_arm_uncondbranch - 24-bit PC relative relocation for - // branch instructions. (unconditional) + // 24-bit PC relative relocation for branch instructions. (unconditional) fixup_arm_uncondbranch, - // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct - // uconditional branch instructions. + // 20-bit PC relative relocation for Thumb2 direct uconditional branch + // instructions. fixup_t2_condbranch, - // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct - // branch unconditional branch instructions. + // 20-bit PC relative relocation for Thumb2 direct branch unconditional branch + // instructions. fixup_t2_uncondbranch, - // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions. + // 12-bit fixup for Thumb B instructions. fixup_arm_thumb_br, // The following fixups handle the ARM BL instructions. These can be @@ -75,46 +67,48 @@ enum Fixups { // MachO does not draw a distinction between the two cases, so it will treat // fixup_arm_uncondbl and fixup_arm_condbl as identical fixups. - // fixup_arm_uncondbl - Fixup for unconditional ARM BL instructions. + // Fixup for unconditional ARM BL instructions. fixup_arm_uncondbl, - // fixup_arm_condbl - Fixup for ARM BL instructions with nontrivial - // conditionalisation. + // Fixup for ARM BL instructions with nontrivial conditionalisation. fixup_arm_condbl, - // fixup_arm_blx - Fixup for ARM BLX instructions. + // Fixup for ARM BLX instructions. fixup_arm_blx, - // fixup_arm_thumb_bl - Fixup for Thumb BL instructions. + // Fixup for Thumb BL instructions. fixup_arm_thumb_bl, - // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions. + // Fixup for Thumb BLX instructions. fixup_arm_thumb_blx, - // fixup_arm_thumb_cb - Fixup for Thumb branch instructions. + // Fixup for Thumb branch instructions. fixup_arm_thumb_cb, - // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs. + // Fixup for Thumb load/store from constant pool instrs. fixup_arm_thumb_cp, - // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions. + // Fixup for Thumb conditional branching instructions. fixup_arm_thumb_bcc, // The next two are for the movt/movw pair // the 16bit imm field are split into imm{15-12} and imm{11-0} fixup_arm_movt_hi16, // :upper16: fixup_arm_movw_lo16, // :lower16: - fixup_t2_movt_hi16, // :upper16: - fixup_t2_movw_lo16, // :lower16: + fixup_t2_movt_hi16, // :upper16: + fixup_t2_movw_lo16, // :lower16: - // fixup_arm_mod_imm - Fixup for mod_imm + // Fixup for mod_imm fixup_arm_mod_imm, + // Fixup for Thumb2 8-bit rotated operand + fixup_t2_so_imm, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind }; } -} +} // namespace llvm #endif diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 559a4f8..f1f35f4 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -11,22 +11,33 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMMCExpr.h" #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> using namespace llvm; @@ -36,9 +47,8 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted."); STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created."); namespace { + class ARMMCCodeEmitter : public MCCodeEmitter { - ARMMCCodeEmitter(const ARMMCCodeEmitter &) = delete; - void operator=(const ARMMCCodeEmitter &) = delete; const MCInstrInfo &MCII; const MCContext &CTX; bool IsLittleEndian; @@ -47,15 +57,18 @@ public: ARMMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool IsLittle) : MCII(mcii), CTX(ctx), IsLittleEndian(IsLittle) { } - - ~ARMMCCodeEmitter() override {} + ARMMCCodeEmitter(const ARMMCCodeEmitter &) = delete; + ARMMCCodeEmitter &operator=(const ARMMCCodeEmitter &) = delete; + ~ARMMCCodeEmitter() override = default; bool isThumb(const MCSubtargetInfo &STI) const { return STI.getFeatureBits()[ARM::ModeThumb]; } + bool isThumb2(const MCSubtargetInfo &STI) const { return isThumb(STI) && STI.getFeatureBits()[ARM::FeatureThumb2]; } + bool isTargetMachO(const MCSubtargetInfo &STI) const { const Triple &TT = STI.getTargetTriple(); return TT.isOSBinFormatMachO(); @@ -200,6 +213,7 @@ public: case ARM_AM::ib: return 3; } } + /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. /// unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const { @@ -273,7 +287,6 @@ public: unsigned getSOImmOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(Op); // We expect MO to be an immediate or an expression, @@ -326,7 +339,17 @@ public: unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { - unsigned SoImm = MI.getOperand(Op).getImm(); + const MCOperand &MO = MI.getOperand(Op); + + // Support for fixups (MCFixup) + if (MO.isExpr()) { + const MCExpr *Expr = MO.getExpr(); + // Fixups resolve to plain values that need to be encoded. + MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_so_imm); + Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc())); + return 0; + } + unsigned SoImm = MO.getImm(); unsigned Encoded = ARM_AM::getT2SOImmVal(SoImm); assert(Encoded != ~0U && "Not a Thumb2 so_imm value?"); return Encoded; @@ -432,18 +455,6 @@ public: } // end anonymous namespace -MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - MCContext &Ctx) { - return new ARMMCCodeEmitter(MCII, Ctx, true); -} - -MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - MCContext &Ctx) { - return new ARMMCCodeEmitter(MCII, Ctx, false); -} - /// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing /// instructions, and rewrite them to their Thumb2 form if we are currently in /// Thumb2 mode. @@ -550,7 +561,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, bool ARMMCCodeEmitter:: EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg, unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); @@ -1515,7 +1526,7 @@ getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op, uint32_t v = ~MO.getImm(); uint32_t lsb = countTrailingZeros(v); uint32_t msb = (32 - countLeadingZeros (v)) - 1; - assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!"); + assert(v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!"); return lsb | (msb << 5); } @@ -1700,3 +1711,15 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, } #include "ARMGenMCCodeEmitter.inc" + +MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new ARMMCCodeEmitter(MCII, Ctx, true); +} + +MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new ARMMCCodeEmitter(MCII, Ctx, false); +} diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 9e4d202..b8a8b1f 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "ARMMCTargetDesc.h" #include "ARMBaseInfo.h" #include "ARMMCAsmInfo.h" -#include "ARMMCTargetDesc.h" #include "InstPrinter/ARMInstPrinter.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCELFStreamer.h" @@ -260,18 +260,37 @@ public: return false; int64_t Imm = Inst.getOperand(0).getImm(); - // FIXME: This is not right for thumb. Target = Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes. return true; } }; +class ThumbMCInstrAnalysis : public ARMMCInstrAnalysis { +public: + ThumbMCInstrAnalysis(const MCInstrInfo *Info) : ARMMCInstrAnalysis(Info) {} + + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size, uint64_t &Target) const override { + // We only handle PCRel branches for now. + if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL) + return false; + + int64_t Imm = Inst.getOperand(0).getImm(); + Target = Addr+Imm+4; // In Thumb mode the PC is always off by 4 bytes. + return true; + } +}; + } static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) { return new ARMMCInstrAnalysis(Info); } +static MCInstrAnalysis *createThumbMCInstrAnalysis(const MCInstrInfo *Info) { + return new ThumbMCInstrAnalysis(Info); +} + // Force static initialization. extern "C" void LLVMInitializeARMTargetMC() { for (Target *T : {&getTheARMLETarget(), &getTheARMBETarget(), @@ -289,9 +308,6 @@ extern "C" void LLVMInitializeARMTargetMC() { TargetRegistry::RegisterMCSubtargetInfo(*T, ARM_MC::createARMMCSubtargetInfo); - // Register the MC instruction analyzer. - TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis); - TargetRegistry::RegisterELFStreamer(*T, createELFStreamer); TargetRegistry::RegisterCOFFStreamer(*T, createARMWinCOFFStreamer); TargetRegistry::RegisterMachOStreamer(*T, createARMMachOStreamer); @@ -313,6 +329,12 @@ extern "C" void LLVMInitializeARMTargetMC() { TargetRegistry::RegisterMCRelocationInfo(*T, createARMMCRelocationInfo); } + // Register the MC instruction analyzer. + for (Target *T : {&getTheARMLETarget(), &getTheARMBETarget()}) + TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis); + for (Target *T : {&getTheThumbLETarget(), &getTheThumbBETarget()}) + TargetRegistry::RegisterMCInstrAnalysis(*T, createThumbMCInstrAnalysis); + // Register the MC Code Emitter for (Target *T : {&getTheARMLETarget(), &getTheThumbLETarget()}) TargetRegistry::RegisterMCCodeEmitter(*T, createARMLEMCCodeEmitter); diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp index 482bcf9..5516a1b 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp @@ -1,4 +1,4 @@ -//===-- ARMMachORelocationInfo.cpp ----------------------------------------===// +//===- ARMMachORelocationInfo.cpp -----------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" #include "ARMMCExpr.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm-c/Disassembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" +#include "llvm/MC/MCExpr.h" using namespace llvm; -using namespace object; namespace { + class ARMMachORelocationInfo : public MCRelocationInfo { public: ARMMachORelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {} @@ -35,7 +35,8 @@ public: } } }; -} // End unnamed namespace + +} // end anonymous namespace /// createARMMachORelocationInfo - Construct an ARM Mach-O RelocationInfo. MCRelocationInfo *llvm::createARMMachORelocationInfo(MCContext &Ctx) { diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index b77181f..4a8139d 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -7,10 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -21,7 +22,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" using namespace llvm; namespace { diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index e49f1a3..4a94318 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -10,20 +10,25 @@ // This file implements the ARMTargetStreamer class. // //===----------------------------------------------------------------------===// -#include "llvm/ADT/MapVector.h" + +#include "ARMTargetMachine.h" #include "llvm/MC/ConstantPools.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/TargetParser.h" using namespace llvm; + // // ARMTargetStreamer Implemenation // + ARMTargetStreamer::ARMTargetStreamer(MCStreamer &S) : MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {} -ARMTargetStreamer::~ARMTargetStreamer() {} +ARMTargetStreamer::~ARMTargetStreamer() = default; // The constant pool handling is shared by all ARMTargetStreamer // implementations. @@ -74,5 +79,180 @@ void ARMTargetStreamer::finishAttributeSection() {} void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {} void ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {} - void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {} + +static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) { + if (STI.getCPU() == "xscale") + return ARMBuildAttrs::v5TEJ; + + if (STI.hasFeature(ARM::HasV8Ops)) { + if (STI.hasFeature(ARM::FeatureRClass)) + return ARMBuildAttrs::v8_R; + return ARMBuildAttrs::v8_A; + } else if (STI.hasFeature(ARM::HasV8MMainlineOps)) + return ARMBuildAttrs::v8_M_Main; + else if (STI.hasFeature(ARM::HasV7Ops)) { + if (STI.hasFeature(ARM::FeatureMClass) && STI.hasFeature(ARM::FeatureDSP)) + return ARMBuildAttrs::v7E_M; + return ARMBuildAttrs::v7; + } else if (STI.hasFeature(ARM::HasV6T2Ops)) + return ARMBuildAttrs::v6T2; + else if (STI.hasFeature(ARM::HasV8MBaselineOps)) + return ARMBuildAttrs::v8_M_Base; + else if (STI.hasFeature(ARM::HasV6MOps)) + return ARMBuildAttrs::v6S_M; + else if (STI.hasFeature(ARM::HasV6Ops)) + return ARMBuildAttrs::v6; + else if (STI.hasFeature(ARM::HasV5TEOps)) + return ARMBuildAttrs::v5TE; + else if (STI.hasFeature(ARM::HasV5TOps)) + return ARMBuildAttrs::v5T; + else if (STI.hasFeature(ARM::HasV4TOps)) + return ARMBuildAttrs::v4T; + else + return ARMBuildAttrs::v4; +} + +static bool isV8M(const MCSubtargetInfo &STI) { + // Note that v8M Baseline is a subset of v6T2! + return (STI.hasFeature(ARM::HasV8MBaselineOps) && + !STI.hasFeature(ARM::HasV6T2Ops)) || + STI.hasFeature(ARM::HasV8MMainlineOps); +} + +/// Emit the build attributes that only depend on the hardware that we expect +// /to be available, and not on the ABI, or any source-language choices. +void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { + switchVendor("aeabi"); + + const StringRef CPUString = STI.getCPU(); + if (!CPUString.empty() && !CPUString.startswith("generic")) { + // FIXME: remove krait check when GNU tools support krait cpu + if (STI.hasFeature(ARM::ProcKrait)) { + emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); + // We consider krait as a "cortex-a9" + hwdiv CPU + // Enable hwdiv through ".arch_extension idiv" + if (STI.hasFeature(ARM::FeatureHWDivThumb) || + STI.hasFeature(ARM::FeatureHWDivARM)) + emitArchExtension(ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM); + } else { + emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); + } + } + + emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(STI)); + + if (STI.hasFeature(ARM::FeatureAClass)) { + emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::ApplicationProfile); + } else if (STI.hasFeature(ARM::FeatureRClass)) { + emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::RealTimeProfile); + } else if (STI.hasFeature(ARM::FeatureMClass)) { + emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::MicroControllerProfile); + } + + emitAttribute(ARMBuildAttrs::ARM_ISA_use, STI.hasFeature(ARM::FeatureNoARM) + ? ARMBuildAttrs::Not_Allowed + : ARMBuildAttrs::Allowed); + + if (isV8M(STI)) { + emitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumbDerived); + } else if (STI.hasFeature(ARM::FeatureThumb2)) { + emitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumb32); + } else if (STI.hasFeature(ARM::HasV4TOps)) { + emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); + } + + if (STI.hasFeature(ARM::FeatureNEON)) { + /* NEON is not exactly a VFP architecture, but GAS emit one of + * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ + if (STI.hasFeature(ARM::FeatureFPARMv8)) { + if (STI.hasFeature(ARM::FeatureCrypto)) + emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8); + else + emitFPU(ARM::FK_NEON_FP_ARMV8); + } else if (STI.hasFeature(ARM::FeatureVFP4)) + emitFPU(ARM::FK_NEON_VFPV4); + else + emitFPU(STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_NEON_FP16 + : ARM::FK_NEON); + // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture + if (STI.hasFeature(ARM::HasV8Ops)) + emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + STI.hasFeature(ARM::HasV8_1aOps) + ? ARMBuildAttrs::AllowNeonARMv8_1a + : ARMBuildAttrs::AllowNeonARMv8); + } else { + if (STI.hasFeature(ARM::FeatureFPARMv8)) + // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one + // FPU, but there are two different names for it depending on the CPU. + emitFPU(STI.hasFeature(ARM::FeatureD16) + ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16 + : ARM::FK_FPV5_D16) + : ARM::FK_FP_ARMV8); + else if (STI.hasFeature(ARM::FeatureVFP4)) + emitFPU(STI.hasFeature(ARM::FeatureD16) + ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16 + : ARM::FK_VFPV4_D16) + : ARM::FK_VFPV4); + else if (STI.hasFeature(ARM::FeatureVFP3)) + emitFPU( + STI.hasFeature(ARM::FeatureD16) + // +d16 + ? (STI.hasFeature(ARM::FeatureVFPOnlySP) + ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16 + : ARM::FK_VFPV3XD) + : (STI.hasFeature(ARM::FeatureFP16) + ? ARM::FK_VFPV3_D16_FP16 + : ARM::FK_VFPV3_D16)) + // -d16 + : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16 + : ARM::FK_VFPV3)); + else if (STI.hasFeature(ARM::FeatureVFP2)) + emitFPU(ARM::FK_VFPV2); + } + + // ABI_HardFP_use attribute to indicate single precision FP. + if (STI.hasFeature(ARM::FeatureVFPOnlySP)) + emitAttribute(ARMBuildAttrs::ABI_HardFP_use, + ARMBuildAttrs::HardFPSinglePrecision); + + if (STI.hasFeature(ARM::FeatureFP16)) + emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP); + + if (STI.hasFeature(ARM::FeatureMP)) + emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); + + // Hardware divide in ARM mode is part of base arch, starting from ARMv8. + // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M). + // It is not possible to produce DisallowDIV: if hwdiv is present in the base + // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits. + // AllowDIVExt is only emitted if hwdiv isn't available in the base arch; + // otherwise, the default value (AllowDIVIfExists) applies. + if (STI.hasFeature(ARM::FeatureHWDivARM) && !STI.hasFeature(ARM::HasV8Ops)) + emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); + + if (STI.hasFeature(ARM::FeatureDSP) && isV8M(STI)) + emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed); + + if (STI.hasFeature(ARM::FeatureStrictAlign)) + emitAttribute(ARMBuildAttrs::CPU_unaligned_access, + ARMBuildAttrs::Not_Allowed); + else + emitAttribute(ARMBuildAttrs::CPU_unaligned_access, + ARMBuildAttrs::Allowed); + + if (STI.hasFeature(ARM::FeatureTrustZone) && + STI.hasFeature(ARM::FeatureVirtualization)) + emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowTZVirtualization); + else if (STI.hasFeature(ARM::FeatureTrustZone)) + emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZ); + else if (STI.hasFeature(ARM::FeatureVirtualization)) + emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowVirtualization); +} diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp index 173cc93..d3ab83b 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp @@ -14,12 +14,14 @@ #include "ARMUnwindOpAsm.h" #include "llvm/Support/ARMEHABI.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/MathExtras.h" +#include <cassert> using namespace llvm; namespace { + /// UnwindOpcodeStreamer - The simple wrapper over SmallVector to emit bytes /// with MSB to LSB per uint32_t ordering. For example, the first byte will /// be placed in Vec[3], and the following bytes will be placed in 2, 1, 0, @@ -27,20 +29,19 @@ namespace { class UnwindOpcodeStreamer { private: SmallVectorImpl<uint8_t> &Vec; - size_t Pos; + size_t Pos = 3; public: - UnwindOpcodeStreamer(SmallVectorImpl<uint8_t> &V) : Vec(V), Pos(3) { - } + UnwindOpcodeStreamer(SmallVectorImpl<uint8_t> &V) : Vec(V) {} /// Emit the byte in MSB to LSB per uint32_t order. - inline void EmitByte(uint8_t elem) { + void EmitByte(uint8_t elem) { Vec[Pos] = elem; Pos = (((Pos ^ 0x3u) + 1) ^ 0x3u); } /// Emit the size prefix. - inline void EmitSize(size_t Size) { + void EmitSize(size_t Size) { size_t SizeInWords = (Size + 3) / 4; assert(SizeInWords <= 0x100u && "Only 256 additional words are allowed for unwind opcodes"); @@ -48,19 +49,20 @@ namespace { } /// Emit the personality index prefix. - inline void EmitPersonalityIndex(unsigned PI) { + void EmitPersonalityIndex(unsigned PI) { assert(PI < ARM::EHABI::NUM_PERSONALITY_INDEX && "Invalid personality prefix"); EmitByte(ARM::EHABI::EHT_COMPACT | PI); } /// Fill the rest of bytes with FINISH opcode. - inline void FillFinishOpcode() { + void FillFinishOpcode() { while (Pos < Vec.size()) EmitByte(ARM::EHABI::UNWIND_OPCODE_FINISH); } }; -} + +} // end anonymous namespace void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { if (RegSave == 0u) @@ -153,7 +155,6 @@ void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) { void UnwindOpcodeAssembler::Finalize(unsigned &PersonalityIndex, SmallVectorImpl<uint8_t> &Result) { - UnwindOpcodeStreamer OpStreamer(Result); if (HasPersonality) { diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h index e0c113e..a7bfbdf 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h @@ -16,8 +16,8 @@ #define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ARMEHABI.h" -#include "llvm/Support/DataTypes.h" +#include <cstddef> +#include <cstdint> namespace llvm { @@ -25,13 +25,12 @@ class MCSymbol; class UnwindOpcodeAssembler { private: - llvm::SmallVector<uint8_t, 32> Ops; - llvm::SmallVector<unsigned, 8> OpBegins; - bool HasPersonality; + SmallVector<uint8_t, 32> Ops; + SmallVector<unsigned, 8> OpBegins; + bool HasPersonality = false; public: - UnwindOpcodeAssembler() - : HasPersonality(0) { + UnwindOpcodeAssembler() { OpBegins.push_back(0); } @@ -40,12 +39,12 @@ public: Ops.clear(); OpBegins.clear(); OpBegins.push_back(0); - HasPersonality = 0; + HasPersonality = false; } /// Set the personality void setPersonality(const MCSymbol *Per) { - HasPersonality = 1; + HasPersonality = true; } /// Emit unwind opcodes for .save directives @@ -88,6 +87,6 @@ private: } }; -} // namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index 166c04b..f74fb2e 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -9,33 +9,41 @@ #include "MCTargetDesc/ARMFixupKinds.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> using namespace llvm; namespace { + class ARMWinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { public: ARMWinCOFFObjectWriter(bool Is64Bit) : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) { assert(!Is64Bit && "AArch64 support not yet implemented"); } - ~ARMWinCOFFObjectWriter() override {} - unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsCrossSection, + ~ARMWinCOFFObjectWriter() override = default; + + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const override; bool recordRelocation(const MCFixup &) const override; }; -unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target, +} // end anonymous namespace + +unsigned ARMWinCOFFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const { @@ -79,13 +87,13 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target, bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { return static_cast<unsigned>(Fixup.getKind()) != ARM::fixup_t2_movt_hi16; } -} namespace llvm { + MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit) { MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit); return createWinCOFFObjectWriter(MOTW, OS); } -} +} // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index 9953c61..5709b4e 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -12,13 +12,35 @@ //===----------------------------------------------------------------------===// #include "Thumb1FrameLowering.h" +#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "Thumb1InstrInfo.h" +#include "ThumbRegisterInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <iterator> +#include <vector> using namespace llvm; @@ -61,13 +83,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // ADJCALLSTACKUP -> add, sp, sp, amount MachineInstr &Old = *I; DebugLoc dl = Old.getDebugLoc(); - unsigned Amount = Old.getOperand(0).getImm(); + unsigned Amount = TII.getFrameSize(Old); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - unsigned Align = getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; + Amount = alignTo(Amount, getStackAlignment()); // Replace the pseudo instruction with a new instruction... unsigned Opc = Old.getOpcode(); @@ -215,7 +236,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, case ARM::R12: if (STI.splitFramePushPop(MF)) break; - // fallthough + LLVM_FALLTHROUGH; case ARM::R0: case ARM::R1: case ARM::R2: @@ -238,9 +259,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, if (HasFP) { FramePtrOffsetInBlock += MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) - .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4) - .setMIFlags(MachineInstr::FrameSetup)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) + .addReg(ARM::SP) + .addImm(FramePtrOffsetInBlock / 4) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); if(FramePtrOffsetInBlock) { CFAOffset += FramePtrOffsetInBlock; unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( @@ -336,14 +359,19 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) - .addReg(ARM::SP)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. if (MFI.hasVarSizedObjects()) AFI->setShouldRestoreSPFromFP(true); + + // In some cases, virtual registers have been introduced, e.g. by uses of + // emitThumbRegPlusImmInReg. + MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs); } static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) { @@ -408,13 +436,13 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), - ARM::SP) - .addReg(ARM::R4)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(ARM::R4) + .add(predOps(ARMCC::AL)); } else - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), - ARM::SP) - .addReg(FramePtr)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(FramePtr) + .add(predOps(ARMCC::AL)); } else { if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { @@ -493,12 +521,12 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) return true; MachineInstrBuilder MIB = - AddDefaultPred( - BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))); + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)) + .add(predOps(ARMCC::AL)); // Copy implicit ops and popped registers, if any. for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef())) - MIB.addOperand(MO); + MIB.add(MO); MIB.addReg(ARM::PC, RegState::Define); // Erase the old instruction (tBX_RET or tPOP). MBB.erase(MBBI); @@ -507,14 +535,14 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // Look for a temporary register to use. // First, compute the liveness information. - LivePhysRegs UsedRegs(STI.getRegisterInfo()); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + LivePhysRegs UsedRegs(TRI); UsedRegs.addLiveOuts(MBB); // The semantic of pristines changed recently and now, // the callee-saved registers that are touched in the function // are not part of the pristines set anymore. // Add those callee-saved now. - const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) UsedRegs.addReg(CSRegs[i]); @@ -533,18 +561,17 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // And some temporary register, just in case. unsigned TemporaryReg = 0; BitVector PopFriendly = - TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID)); + TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID)); assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); // Rebuild the GPRs from the high registers because they are removed // form the GPR reg class for thumb1. BitVector GPRsNoLRSP = - TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID)); + TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID)); GPRsNoLRSP |= PopFriendly; GPRsNoLRSP.reset(ARM::LR); GPRsNoLRSP.reset(ARM::SP); GPRsNoLRSP.reset(ARM::PC); - for (int Register = GPRsNoLRSP.find_first(); Register != -1; - Register = GPRsNoLRSP.find_next(Register)) { + for (unsigned Register : GPRsNoLRSP.set_bits()) { if (!UsedRegs.contains(Register)) { // Remember the first pop-friendly register and exit. if (PopFriendly.test(Register)) { @@ -566,22 +593,23 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, if (TemporaryReg) { assert(!PopReg && "Unnecessary MOV is about to be inserted"); PopReg = PopFriendly.find_first(); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(TemporaryReg, RegState::Define) - .addReg(PopReg, RegState::Kill)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(TemporaryReg, RegState::Define) + .addReg(PopReg, RegState::Kill) + .add(predOps(ARMCC::AL)); } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { // We couldn't use the direct restoration above, so // perform the opposite conversion: tPOP_RET to tPOP. MachineInstrBuilder MIB = - AddDefaultPred( - BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))); + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)); bool Popped = false; for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && MO.getReg() != ARM::PC) { - MIB.addOperand(MO); + MIB.add(MO); if (!MO.isImplicit()) Popped = true; } @@ -590,23 +618,27 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, MBB.erase(MIB.getInstr()); // Erase the old instruction. MBB.erase(MBBI); - MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))); + MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)) + .add(predOps(ARMCC::AL)); } assert(PopReg && "Do not know how to get LR"); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) .addReg(PopReg, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(ARM::LR, RegState::Define) - .addReg(PopReg, RegState::Kill)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(PopReg, RegState::Kill) + .add(predOps(ARMCC::AL)); if (TemporaryReg) - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(PopReg, RegState::Define) - .addReg(TemporaryReg, RegState::Kill)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(PopReg, RegState::Define) + .addReg(TemporaryReg, RegState::Kill) + .add(predOps(ARMCC::AL)); return true; } @@ -666,13 +698,14 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, CopyRegs.insert(ArgReg); // Push the low registers and lr + const MachineRegisterInfo &MRI = MF.getRegInfo(); if (!LoRegsToSave.empty()) { - MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)); - AddDefaultPred(MIB); + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { if (LoRegsToSave.count(Reg)) { - bool isKill = !MF.getRegInfo().isLiveIn(Reg); - if (isKill) + bool isKill = !MRI.isLiveIn(Reg); + if (isKill && !MRI.isReserved(Reg)) MBB.addLiveIn(Reg); MIB.addReg(Reg, getKillRegState(isKill)); @@ -708,22 +741,21 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); // Create the PUSH, but don't insert it yet (the MOVs need to come first). - MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)); - AddDefaultPred(PushMIB); + MachineInstrBuilder PushMIB = + BuildMI(MF, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); SmallVector<unsigned, 4> RegsToPush; while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { if (HiRegsToSave.count(*HiRegToSave)) { - bool isKill = !MF.getRegInfo().isLiveIn(*HiRegToSave); - if (isKill) + bool isKill = !MRI.isLiveIn(*HiRegToSave); + if (isKill && !MRI.isReserved(*HiRegToSave)) MBB.addLiveIn(*HiRegToSave); // Emit a MOV from the high reg to the low reg. - MachineInstrBuilder MIB = - BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)); - MIB.addReg(*CopyReg, RegState::Define); - MIB.addReg(*HiRegToSave, getKillRegState(isKill)); - AddDefaultPred(MIB); + BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) + .addReg(*CopyReg, RegState::Define) + .addReg(*HiRegToSave, getKillRegState(isKill)) + .add(predOps(ARMCC::AL)); // Record the register that must be added to the PUSH. RegsToPush.push_back(*CopyReg); @@ -735,7 +767,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, } // Add the low registers to the PUSH, in ascending order. - for (unsigned Reg : reverse(RegsToPush)) + for (unsigned Reg : llvm::reverse(RegsToPush)) PushMIB.addReg(Reg, RegState::Kill); // Insert the PUSH instruction after the MOVs. @@ -817,19 +849,18 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); // Create the POP instruction. - MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)); - AddDefaultPred(PopMIB); + MachineInstrBuilder PopMIB = + BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { // Add the low register to the POP. PopMIB.addReg(*CopyReg, RegState::Define); // Create the MOV from low to high register. - MachineInstrBuilder MIB = - BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)); - MIB.addReg(*HiRegToRestore, RegState::Define); - MIB.addReg(*CopyReg, RegState::Kill); - AddDefaultPred(MIB); + BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) + .addReg(*HiRegToRestore, RegState::Define) + .addReg(*CopyReg, RegState::Kill) + .add(predOps(ARMCC::AL)); CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); HiRegToRestore = @@ -837,11 +868,8 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, } } - - - - MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)); - AddDefaultPred(MIB); + MachineInstrBuilder MIB = + BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); bool NeedsPop = false; for (unsigned i = CSI.size(); i != 0; --i) { @@ -859,6 +887,16 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, // ARMv4T requires BX, see emitEpilogue if (!STI.hasV5TOps()) continue; + // Tailcall optimization failed; change TCRETURN to a tBL + if (MI->getOpcode() == ARM::TCRETURNdi || + MI->getOpcode() == ARM::TCRETURNri) { + unsigned Opcode = MI->getOpcode() == ARM::TCRETURNdi + ? ARM::tBL : ARM::tBLXr; + MachineInstrBuilder BL = BuildMI(MF, DL, TII.get(Opcode)); + BL.add(predOps(ARMCC::AL)); + BL.add(MI->getOperand(0)); + MBB.insert(MI, &*BL); + } Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); if (MI != MBB.end()) diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 4b4fbaa..3a3920a 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "ARMSubtarget.h" #include "Thumb1InstrInfo.h" +#include "ARMSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -24,8 +24,8 @@ using namespace llvm; Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI() {} -/// getNoopForMachoTarget - Return the noop instruction to use for a noop. -void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +/// Return the noop instruction to use for a noop. +void Thumb1InstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(ARM::tMOVr); NopInst.addOperand(MCOperand::createReg(ARM::R8)); NopInst.addOperand(MCOperand::createReg(ARM::R8)); @@ -50,20 +50,29 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg) || !ARM::tGPRRegClass.contains(DestReg)) - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc))); + BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)); else { - // FIXME: The performance consequences of this are going to be atrocious. - // Some things to try that should be better: - // * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11 - // * 'movs $dst, $src' if cpsr isn't live - // See: http://lists.llvm.org/pipermail/llvm-dev/2014-August/075998.html + // FIXME: Can also use 'mov hi, $src; mov $dst, hi', + // with hi as either r10 or r11. + + const TargetRegisterInfo *RegInfo = st.getRegisterInfo(); + if (MBB.computeRegisterLiveness(RegInfo, ARM::CPSR, I) + == MachineBasicBlock::LQR_Dead) { + BuildMI(MBB, I, DL, get(ARM::tMOVSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + ->addRegisterDead(ARM::CPSR, RegInfo); + return; + } // 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH))) - .addReg(SrcReg, getKillRegState(KillSrc)); - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPOP))) - .addReg(DestReg, getDefRegState(true)); + BuildMI(MBB, I, DL, get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .addReg(SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .addReg(DestReg, getDefRegState(true)); } } @@ -87,9 +96,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::tSTRspi)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } } @@ -113,8 +125,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } } diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h index 931914a..e8d9a9c 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h @@ -25,8 +25,8 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb1InstrInfo(const ARMSubtarget &STI); - /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - void getNoopForMachoTarget(MCInst &NopInst) const override; + /// Return the noop instruction to use for a noop. + void getNoop(MCInst &NopInst) const override; // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp index d01fc8c..04bdd91 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -9,13 +9,26 @@ #include "ARM.h" #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include <cassert> +#include <new> + using namespace llvm; #define DEBUG_TYPE "thumb2-it" @@ -24,16 +37,18 @@ STATISTIC(NumITs, "Number of IT blocks inserted"); STATISTIC(NumMovedInsts, "Number of predicated instructions moved"); namespace { + class Thumb2ITBlockPass : public MachineFunctionPass { public: static char ID; - Thumb2ITBlockPass() : MachineFunctionPass(ID) {} bool restrictIT; const Thumb2InstrInfo *TII; const TargetRegisterInfo *TRI; ARMFunctionInfo *AFI; + Thumb2ITBlockPass() : MachineFunctionPass(ID) {} + bool runOnMachineFunction(MachineFunction &Fn) override; MachineFunctionProperties getRequiredProperties() const override { @@ -52,8 +67,10 @@ namespace { SmallSet<unsigned, 4> &Uses); bool InsertITInstructions(MachineBasicBlock &MBB); }; + char Thumb2ITBlockPass::ID = 0; -} + +} // end anonymous namespace /// TrackDefUses - Tracking what registers are being defined and used by /// instructions in the IT block. This also tracks "dependencies", i.e. uses diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 1c731d6..9125be9 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -1,4 +1,4 @@ -//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===// +//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------------===// // // The LLVM Compiler Infrastructure // @@ -11,16 +11,26 @@ // //===----------------------------------------------------------------------===// -#include "Thumb2InstrInfo.h" -#include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "Thumb2InstrInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> using namespace llvm; @@ -30,10 +40,10 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden, cl::init(false)); Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI() {} + : ARMBaseInstrInfo(STI) {} -/// getNoopForMachoTarget - Return the noop instruction to use for a noop. -void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +/// Return the noop instruction to use for a noop. +void Thumb2InstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(ARM::tHINT); NopInst.addOperand(MCOperand::createImm(0)); NopInst.addOperand(MCOperand::createImm(ARMCC::AL)); @@ -117,8 +127,9 @@ void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (!ARM::GPRRegClass.contains(DestReg, SrcReg)) return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc); - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc))); + BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)); } void Thumb2InstrInfo:: @@ -138,9 +149,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass || RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass || RC == &ARM::GPRnopcRegClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::t2STRi12)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); return; } @@ -156,8 +170,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8)); AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); - MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO); - AddDefaultPred(MIB); + MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL)); return; } @@ -180,8 +193,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass || RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass || RC == &ARM::GPRnopcRegClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); return; } @@ -198,8 +214,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8)); AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); - MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO); - AddDefaultPred(MIB); + MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL)); if (TargetRegisterInfo::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); @@ -259,10 +274,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, if (Fits) { if (isSub) { BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg) - .addReg(BaseReg) - .addReg(DestReg, RegState::Kill) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0) - .setMIFlags(MIFlags); + .addReg(BaseReg) + .addReg(DestReg, RegState::Kill) + .add(predOps(Pred, PredReg)) + .add(condCodeOp()) + .setMIFlags(MIFlags); } else { // Here we know that DestReg is not SP but we do not // know anything about BaseReg. t2ADDrr is an invalid @@ -270,10 +286,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, // is fine if SP is the first argument. To be sure we // do not generate invalid encoding, put BaseReg first. BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg) - .addReg(BaseReg) - .addReg(DestReg, RegState::Kill) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0) - .setMIFlags(MIFlags); + .addReg(BaseReg) + .addReg(DestReg, RegState::Kill) + .add(predOps(Pred, PredReg)) + .add(condCodeOp()) + .setMIFlags(MIFlags); } return; } @@ -284,8 +301,10 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, unsigned Opc = 0; if (DestReg == ARM::SP && BaseReg != ARM::SP) { // mov sp, rn. Note t2MOVr cannot be used. - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),DestReg) - .addReg(BaseReg).setMIFlags(MIFlags)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) + .addReg(BaseReg) + .setMIFlags(MIFlags) + .add(predOps(ARMCC::AL)); BaseReg = ARM::SP; continue; } @@ -296,8 +315,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) { assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?"); Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags)); + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg) + .addImm(ThisVal / 4) + .setMIFlags(MIFlags) + .add(predOps(ARMCC::AL)); NumBytes = 0; continue; } @@ -334,12 +356,13 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, } // Build the new ADD / SUB. - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg, RegState::Kill) - .addImm(ThisVal)).setMIFlags(MIFlags); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg, RegState::Kill) + .addImm(ThisVal) + .add(predOps(ARMCC::AL)) + .setMIFlags(MIFlags); if (HasCCOut) - AddDefaultCC(MIB); + MIB.add(condCodeOp()); BaseReg = DestReg; } @@ -474,7 +497,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, do MI.RemoveOperand(FrameRegIdx+1); while (MI.getNumOperands() > FrameRegIdx+1); MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); return true; } @@ -526,9 +549,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Add cc_out operand if the original instruction did not have one. if (!HasCCOut) MI.addOperand(MachineOperand::CreateReg(0, false)); - } else { - // AddrMode4 and AddrMode6 cannot handle any offset. if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) return false; diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 15d6330..c834ba7 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -26,8 +26,8 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb2InstrInfo(const ARMSubtarget &STI); - /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - void getNoopForMachoTarget(MCInst &NopInst) const override; + /// Return the noop instruction to use for a noop. + void getNoop(MCInst &NopInst) const override; // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index 8208e7e..d911dd9 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -10,20 +10,38 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" -#include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/IR/Function.h" // To access Function attributes +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <functional> +#include <iterator> #include <utility> + using namespace llvm; #define DEBUG_TYPE "t2-reduce-size" @@ -40,6 +58,7 @@ static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3", cl::init(-1), cl::Hidden); namespace { + /// ReduceTable - A static table with information on mapping from wide /// opcodes to narrow struct ReduceEntry { @@ -139,11 +158,12 @@ namespace { class Thumb2SizeReduce : public MachineFunctionPass { public: static char ID; - Thumb2SizeReduce(std::function<bool(const Function &)> Ftor); const Thumb2InstrInfo *TII; const ARMSubtarget *STI; + Thumb2SizeReduce(std::function<bool(const Function &)> Ftor); + bool runOnMachineFunction(MachineFunction &MF) override; MachineFunctionProperties getRequiredProperties() const override { @@ -201,19 +221,21 @@ namespace { struct MBBInfo { // The flags leaving this block have high latency. - bool HighLatencyCPSR; + bool HighLatencyCPSR = false; // Has this block been visited yet? - bool Visited; + bool Visited = false; - MBBInfo() : HighLatencyCPSR(false), Visited(false) {} + MBBInfo() = default; }; SmallVector<MBBInfo, 8> BlockInfo; std::function<bool(const Function &)> PredicateFtor; }; + char Thumb2SizeReduce::ID = 0; -} + +} // end anonymous namespace Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor) : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) { @@ -490,14 +512,13 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, isLdStMul = true; break; } - case ARM::t2STMIA: { + case ARM::t2STMIA: // If the base register is killed, we don't care what its value is after the // instruction, so we can use an updating STMIA. if (!MI->getOperand(0).isKill()) return false; break; - } case ARM::t2LDMIA_RET: { unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg != ARM::SP) @@ -562,8 +583,8 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead); if (!isLdStMul) { - MIB.addOperand(MI->getOperand(0)); - MIB.addOperand(MI->getOperand(1)); + MIB.add(MI->getOperand(0)); + MIB.add(MI->getOperand(1)); if (HasImmOffset) MIB.addImm(OffsetImm / Scale); @@ -577,7 +598,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, // Transfer the rest of operands. for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) - MIB.addOperand(MI->getOperand(OpNum)); + MIB.add(MI->getOperand(OpNum)); // Transfer memoperands. MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); @@ -621,12 +642,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) return false; - MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), - TII->get(ARM::tADDrSPi)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - .addImm(Imm / 4); // The tADDrSPi has an implied scale by four. - AddDefaultPred(MIB); + MachineInstrBuilder MIB = + BuildMI(MBB, MI, MI->getDebugLoc(), + TII->get(ARM::tADDrSPi)) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)) + .addImm(Imm / 4) // The tADDrSPi has an implied scale by four. + .add(predOps(ARMCC::AL)); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); @@ -652,11 +674,10 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) { switch (Opc) { default: break; - case ARM::t2ADDSri: { + case ARM::t2ADDSri: if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) return true; LLVM_FALLTHROUGH; - } case ARM::t2ADDSrr: return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); } @@ -698,7 +719,6 @@ bool Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop) { - if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; @@ -785,13 +805,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); - MIB.addOperand(MI->getOperand(0)); - if (NewMCID.hasOptionalDef()) { - if (HasCC) - AddDefaultT1CC(MIB, CCDead); - else - AddNoT1CC(MIB); - } + MIB.add(MI->getOperand(0)); + if (NewMCID.hasOptionalDef()) + MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp()); // Transfer the rest of operands. unsigned NumOps = MCID.getNumOperands(); @@ -800,7 +816,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, continue; if (SkipPred && MCID.OpInfo[i].isPredicate()) continue; - MIB.addOperand(MI->getOperand(i)); + MIB.add(MI->getOperand(i)); } // Transfer MI flags. @@ -880,13 +896,9 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); - MIB.addOperand(MI->getOperand(0)); - if (NewMCID.hasOptionalDef()) { - if (HasCC) - AddDefaultT1CC(MIB, CCDead); - else - AddNoT1CC(MIB); - } + MIB.add(MI->getOperand(0)); + if (NewMCID.hasOptionalDef()) + MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp()); // Transfer the rest of operands. unsigned NumOps = MCID.getNumOperands(); @@ -909,10 +921,10 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Skip implicit def of CPSR. Either it's modeled as an optional // def now or it's already an implicit def on the new instruction. continue; - MIB.addOperand(MO); + MIB.add(MO); } if (!MCID.isPredicable() && NewMCID.isPredicable()) - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); diff --git a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp index 2efd63b..15a5675 100644 --- a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -93,9 +93,10 @@ static void emitThumb2LoadConstPool(MachineBasicBlock &MBB, unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci)) - .addReg(DestReg, getDefRegState(true), SubIdx) - .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0) - .setMIFlags(MIFlags); + .addReg(DestReg, getDefRegState(true), SubIdx) + .addConstantPoolIndex(Idx) + .add(predOps(ARMCC::AL)) + .setMIFlags(MIFlags); } /// emitLoadConstPool - Emits a load from constpool to materialize the @@ -145,14 +146,17 @@ static void emitThumbRegPlusImmInReg( LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) { - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg) + .add(t1CondCodeOp()) .addImm(NumBytes) .setMIFlags(MIFlags); } else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) { - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg) + .add(t1CondCodeOp()) .addImm(NumBytes) .setMIFlags(MIFlags); - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg)) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg) + .add(t1CondCodeOp()) .addReg(LdReg, RegState::Kill) .setMIFlags(MIFlags); } else if (ST.genExecuteOnly()) { @@ -167,12 +171,12 @@ static void emitThumbRegPlusImmInReg( : ((isHigh || !CanChangeCC) ? ARM::tADDhirr : ARM::tADDrr); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); if (Opc != ARM::tADDhirr) - MIB = AddDefaultT1CC(MIB); + MIB = MIB.add(t1CondCodeOp()); if (DestReg == ARM::SP || isSub) MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill); else MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); } /// emitThumbRegPlusImmediate - Emits a series of instructions to materialize @@ -307,12 +311,12 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg); if (CopyNeedsCC) - MIB = AddDefaultT1CC(MIB); + MIB = MIB.add(t1CondCodeOp()); MIB.addReg(BaseReg, RegState::Kill); if (CopyOpc != ARM::tMOVr) { MIB.addImm(CopyImm); } - AddDefaultPred(MIB.setMIFlags(MIFlags)); + MIB.setMIFlags(MIFlags).add(predOps(ARMCC::AL)); BaseReg = DestReg; } @@ -324,10 +328,11 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg); if (ExtraNeedsCC) - MIB = AddDefaultT1CC(MIB); - MIB.addReg(BaseReg).addImm(ExtraImm); - MIB = AddDefaultPred(MIB); - MIB.setMIFlags(MIFlags); + MIB = MIB.add(t1CondCodeOp()); + MIB.addReg(BaseReg) + .addImm(ExtraImm) + .add(predOps(ARMCC::AL)) + .setMIFlags(MIFlags); } } @@ -460,9 +465,10 @@ bool ThumbRegisterInfo::saveScavengerRegister( // a call clobbered register that we know won't be used in Thumb1 mode. const TargetInstrInfo &TII = *STI.getInstrInfo(); DebugLoc DL; - AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr)) - .addReg(ARM::R12, RegState::Define) - .addReg(Reg, RegState::Kill)); + BuildMI(MBB, I, DL, TII.get(ARM::tMOVr)) + .addReg(ARM::R12, RegState::Define) + .addReg(Reg, RegState::Kill) + .add(predOps(ARMCC::AL)); // The UseMI is where we would like to restore the register. If there's // interference with R12 before then, however, we'll need to restore it @@ -490,8 +496,10 @@ bool ThumbRegisterInfo::saveScavengerRegister( } } // Restore the register from R12 - AddDefaultPred(BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)). - addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill)); + BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)) + .addReg(Reg, RegState::Define) + .addReg(ARM::R12, RegState::Kill) + .add(predOps(ARMCC::AL)); return true; } @@ -621,5 +629,5 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Add predicate back if it's needed. if (MI.isPredicable()) - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); } |