summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/ARM
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2015-12-30 13:13:10 +0000
committerdim <dim@FreeBSD.org>2015-12-30 13:13:10 +0000
commit9b5bf5c4f53d65d6a48722d7410ed7cb15f5ba3a (patch)
treeb466a4817f79516eb1df8eae92bccf62ecc84003 /contrib/llvm/lib/Target/ARM
parentf09a28d1de99fda4f5517fb12670fc36552f4927 (diff)
parente194cd6d03d91631334d9d5e55b506036f423cc8 (diff)
downloadFreeBSD-src-9b5bf5c4f53d65d6a48722d7410ed7cb15f5ba3a.zip
FreeBSD-src-9b5bf5c4f53d65d6a48722d7410ed7cb15f5ba3a.tar.gz
Update llvm to trunk r256633.
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.h1
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td708
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp119
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h9
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp174
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h23
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp17
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h8
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.h22
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp44
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp64
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp112
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp202
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.h8
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp275
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp2101
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h49
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp77
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td73
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td440
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td163
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td95
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td55
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp504
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h14
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td37
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td1046
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp56
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp175
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h72
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp50
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.h3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp170
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h33
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp204
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp113
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp3
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp389
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h7
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h8
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp54
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h59
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h4
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp99
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h3
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp32
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp12
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp291
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h36
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp16
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp16
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp47
61 files changed, 4554 insertions, 3872 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
index 9550a3a..cd7540e 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -35,7 +35,6 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
-FunctionPass *createARMGlobalBaseRegPass();
FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index ef609a6..a44dc83 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -17,6 +17,17 @@
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
+// ARM Helper classes.
+//
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+class Architecture<string fname, string aname, list<SubtargetFeature> features >
+ : SubtargetFeature<fname, "ARMArch", aname,
+ !strconcat(aname, " architecture"), features>;
+
+//===----------------------------------------------------------------------===//
// ARM Subtarget state.
//
@@ -51,8 +62,11 @@ def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8",
"true", "Enable ARMv8 FP",
[FeatureVFP4]>;
+def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
+ "Enable full half-precision floating point",
+ [FeatureFPARMv8]>;
def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
- "Restrict VFP3 to 16 double registers">;
+ "Restrict FP to 16 double registers">;
def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
"Enable divide instructions">;
def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
@@ -119,9 +133,9 @@ def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true",
"Has return address stack">;
-/// Some M architectures don't have the DSP extension (v7E-M vs. v7M)
-def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
- "Supports v7 DSP instructions in Thumb2">;
+/// DSP extension.
+def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true",
+ "Supports DSP instructions in ARM and/or Thumb2">;
// Multiprocessing extension.
def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
@@ -150,11 +164,28 @@ def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass",
def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
"NaCl trap">;
+def FeatureStrictAlign : SubtargetFeature<"strict-align",
+ "StrictAlign", "true",
+ "Disallow all unaligned memory "
+ "access">;
+
def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
"Generate calls via indirect call "
"instructions">;
-// ARM ISAs.
+def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true",
+ "Reserve R9, making it unavailable as "
+ "GPR">;
+
+def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true",
+ "Don't use movt/movw pairs for 32-bit "
+ "imms">;
+
+
+//===----------------------------------------------------------------------===//
+// ARM ISAa.
+//
+
def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
"Support ARM v4T instructions">;
def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true",
@@ -180,302 +211,444 @@ def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
[HasV6T2Ops, FeaturePerfMon]>;
def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
- [HasV7Ops, FeatureVirtualization,
- FeatureMP]>;
+ [HasV7Ops]>;
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
"Support ARM v8.1a instructions",
- [HasV8Ops, FeatureAClass, FeatureCRC]>;
+ [HasV8Ops]>;
+def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
+ "Support ARM v8.2a instructions",
+ [HasV8_1aOps]>;
+
//===----------------------------------------------------------------------===//
-// ARM Processors supported.
+// ARM Processor subtarget features.
//
-include "ARMSchedule.td"
-
-// ARM processor families.
def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
- "Cortex-A5 ARM processors",
- [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVMLxForwarding, FeatureT2XtPk,
- FeatureTrustZone, FeatureMP]>;
+ "Cortex-A5 ARM processors", []>;
def ProcA7 : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7",
- "Cortex-A7 ARM processors",
- [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVMLxForwarding, FeatureT2XtPk,
- FeatureVFP4, FeatureMP,
- FeatureHWDiv, FeatureHWDivARM,
- FeatureTrustZone, FeatureVirtualization]>;
+ "Cortex-A7 ARM processors", []>;
def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
- "Cortex-A8 ARM processors",
- [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVMLxForwarding, FeatureT2XtPk,
- FeatureTrustZone]>;
+ "Cortex-A8 ARM processors", []>;
def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
- "Cortex-A9 ARM processors",
- [FeatureVMLxForwarding,
- FeatureT2XtPk, FeatureFP16,
- FeatureAvoidPartialCPSR,
- FeatureTrustZone]>;
-def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
- "Swift ARM processors",
- [FeatureNEONForFP, FeatureT2XtPk,
- FeatureVFP4, FeatureMP, FeatureHWDiv,
- FeatureHWDivARM, FeatureAvoidPartialCPSR,
- FeatureAvoidMOVsShOp,
- FeatureHasSlowFPVMLx, FeatureTrustZone]>;
+ "Cortex-A9 ARM processors", []>;
def ProcA12 : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12",
- "Cortex-A12 ARM processors",
- [FeatureVMLxForwarding,
- FeatureT2XtPk, FeatureVFP4,
- FeatureHWDiv, FeatureHWDivARM,
- FeatureAvoidPartialCPSR,
- FeatureVirtualization,
- FeatureTrustZone]>;
-
-
-// FIXME: It has not been determined if A15 has these features.
-def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
- "Cortex-A15 ARM processors",
- [FeatureT2XtPk, FeatureVFP4,
- FeatureMP, FeatureHWDiv, FeatureHWDivARM,
- FeatureAvoidPartialCPSR,
- FeatureTrustZone, FeatureVirtualization]>;
-
+ "Cortex-A12 ARM processors", []>;
+def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
+ "Cortex-A15 ARM processors", []>;
def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17",
- "Cortex-A17 ARM processors",
- [FeatureVMLxForwarding,
- FeatureT2XtPk, FeatureVFP4,
- FeatureHWDiv, FeatureHWDivARM,
- FeatureAvoidPartialCPSR,
- FeatureVirtualization,
- FeatureTrustZone]>;
-
+ "Cortex-A17 ARM processors", []>;
+def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
+ "Cortex-A35 ARM processors", []>;
def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
- "Cortex-A53 ARM processors",
- [FeatureHWDiv, FeatureHWDivARM,
- FeatureTrustZone, FeatureT2XtPk,
- FeatureCrypto, FeatureCRC]>;
-
+ "Cortex-A53 ARM processors", []>;
def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
- "Cortex-A57 ARM processors",
- [FeatureHWDiv, FeatureHWDivARM,
- FeatureTrustZone, FeatureT2XtPk,
- FeatureCrypto, FeatureCRC]>;
+ "Cortex-A57 ARM processors", []>;
+def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
+ "Cortex-A72 ARM processors", []>;
-def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
- "Cortex-R4 ARM processors",
- [FeatureHWDiv,
- FeatureAvoidPartialCPSR,
- FeatureDSPThumb2, FeatureT2XtPk,
- HasV7Ops, FeatureDB, FeatureHasRAS,
- FeatureRClass]>;
+def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
+ "Qualcomm ARM processors", []>;
+def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
+ "Swift ARM processors", []>;
+
+def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
+ "Cortex-R4 ARM processors", []>;
def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
- "Cortex-R5 ARM processors",
- [FeatureSlowFPBrcc,
- FeatureHWDiv, FeatureHWDivARM,
- FeatureHasSlowFPVMLx,
- FeatureAvoidPartialCPSR,
- FeatureT2XtPk]>;
-
-// FIXME: krait has currently the same features as A9
-// plus VFP4 and hardware division features.
-def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
- "Qualcomm ARM processors",
- [FeatureVMLxForwarding,
- FeatureT2XtPk, FeatureFP16,
- FeatureAvoidPartialCPSR,
- FeatureTrustZone,
- FeatureVFP4,
- FeatureHWDiv,
- FeatureHWDivARM]>;
+ "Cortex-R5 ARM processors", []>;
+def ProcR7 : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7",
+ "Cortex-R7 ARM processors", []>;
-class ProcNoItin<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
+//===----------------------------------------------------------------------===//
+// ARM schedules.
+//
+
+include "ARMSchedule.td"
+
+
+//===----------------------------------------------------------------------===//
+// ARM architectures
+//
+
+def ARMv2 : Architecture<"armv2", "ARMv2", []>;
+
+def ARMv2a : Architecture<"armv2a", "ARMv2a", []>;
+
+def ARMv3 : Architecture<"armv3", "ARMv3", []>;
+
+def ARMv3m : Architecture<"armv3m", "ARMv3m", []>;
+
+def ARMv4 : Architecture<"armv4", "ARMv4", []>;
+
+def ARMv4t : Architecture<"armv4t", "ARMv4t", [HasV4TOps]>;
+
+def ARMv5t : Architecture<"armv5t", "ARMv5t", [HasV5TOps]>;
+
+def ARMv5te : Architecture<"armv5te", "ARMv5te", [HasV5TEOps]>;
+
+def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>;
+
+def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops]>;
+
+def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops,
+ FeatureDSP]>;
+
+def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>;
+
+def ARMv6kz : Architecture<"armv6kz", "ARMv6kz", [HasV6KOps,
+ FeatureTrustZone]>;
+
+def ARMv6m : Architecture<"armv6-m", "ARMv6m", [HasV6MOps,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureMClass]>;
+
+def ARMv6sm : Architecture<"armv6s-m", "ARMv6sm", [HasV6MOps,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureMClass]>;
+
+def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops,
+ FeatureNEON,
+ FeatureDB,
+ FeatureDSP,
+ FeatureAClass]>;
+
+def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops,
+ FeatureDB,
+ FeatureDSP,
+ FeatureHWDiv,
+ FeatureRClass]>;
+
+def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops,
+ FeatureThumb2,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureHWDiv,
+ FeatureMClass]>;
+
+def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops,
+ FeatureThumb2,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureHWDiv,
+ FeatureMClass,
+ FeatureDSP,
+ FeatureT2XtPk]>;
+
+def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def ARMv81a : Architecture<"armv8.1-a", "ARMv81a", [HasV8_1aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+// Aliases
+def IWMMXT : Architecture<"iwmmxt", "ARMv5te", [ARMv5te]>;
+def IWMMXT2 : Architecture<"iwmmxt2", "ARMv5te", [ARMv5te]>;
+def XScale : Architecture<"xscale", "ARMv5te", [ARMv5te]>;
+def ARMv6j : Architecture<"armv6j", "ARMv7a", [ARMv6]>;
+def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>;
+def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>;
+
+
+//===----------------------------------------------------------------------===//
+// ARM processors
+//
+
+// Dummy CPU, used to target architectures
+def : ProcNoItin<"generic", []>;
+
+def : ProcNoItin<"arm8", [ARMv4]>;
+def : ProcNoItin<"arm810", [ARMv4]>;
+def : ProcNoItin<"strongarm", [ARMv4]>;
+def : ProcNoItin<"strongarm110", [ARMv4]>;
+def : ProcNoItin<"strongarm1100", [ARMv4]>;
+def : ProcNoItin<"strongarm1110", [ARMv4]>;
+
+def : ProcNoItin<"arm7tdmi", [ARMv4t]>;
+def : ProcNoItin<"arm7tdmi-s", [ARMv4t]>;
+def : ProcNoItin<"arm710t", [ARMv4t]>;
+def : ProcNoItin<"arm720t", [ARMv4t]>;
+def : ProcNoItin<"arm9", [ARMv4t]>;
+def : ProcNoItin<"arm9tdmi", [ARMv4t]>;
+def : ProcNoItin<"arm920", [ARMv4t]>;
+def : ProcNoItin<"arm920t", [ARMv4t]>;
+def : ProcNoItin<"arm922t", [ARMv4t]>;
+def : ProcNoItin<"arm940t", [ARMv4t]>;
+def : ProcNoItin<"ep9312", [ARMv4t]>;
+
+def : ProcNoItin<"arm10tdmi", [ARMv5t]>;
+def : ProcNoItin<"arm1020t", [ARMv5t]>;
+
+def : ProcNoItin<"arm9e", [ARMv5te]>;
+def : ProcNoItin<"arm926ej-s", [ARMv5te]>;
+def : ProcNoItin<"arm946e-s", [ARMv5te]>;
+def : ProcNoItin<"arm966e-s", [ARMv5te]>;
+def : ProcNoItin<"arm968e-s", [ARMv5te]>;
+def : ProcNoItin<"arm10e", [ARMv5te]>;
+def : ProcNoItin<"arm1020e", [ARMv5te]>;
+def : ProcNoItin<"arm1022e", [ARMv5te]>;
+def : ProcNoItin<"xscale", [ARMv5te]>;
+def : ProcNoItin<"iwmmxt", [ARMv5te]>;
+
+def : Processor<"arm1136j-s", ARMV6Itineraries, [ARMv6]>;
+def : Processor<"arm1136jf-s", ARMV6Itineraries, [ARMv6,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+def : Processor<"cortex-m0", ARMV6Itineraries, [ARMv6m]>;
+def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m]>;
+def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m]>;
+def : Processor<"sc000", ARMV6Itineraries, [ARMv6m]>;
+
+def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>;
+def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+def : Processor<"mpcorenovfp", ARMV6Itineraries, [ARMv6k]>;
+def : Processor<"mpcore", ARMV6Itineraries, [ARMv6k,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+def : Processor<"arm1156t2-s", ARMV6Itineraries, [ARMv6t2]>;
+def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
-// V4 Processors.
-def : ProcNoItin<"generic", []>;
-def : ProcNoItin<"arm8", []>;
-def : ProcNoItin<"arm810", []>;
-def : ProcNoItin<"strongarm", []>;
-def : ProcNoItin<"strongarm110", []>;
-def : ProcNoItin<"strongarm1100", []>;
-def : ProcNoItin<"strongarm1110", []>;
-
-// V4T Processors.
-def : ProcNoItin<"arm7tdmi", [HasV4TOps]>;
-def : ProcNoItin<"arm7tdmi-s", [HasV4TOps]>;
-def : ProcNoItin<"arm710t", [HasV4TOps]>;
-def : ProcNoItin<"arm720t", [HasV4TOps]>;
-def : ProcNoItin<"arm9", [HasV4TOps]>;
-def : ProcNoItin<"arm9tdmi", [HasV4TOps]>;
-def : ProcNoItin<"arm920", [HasV4TOps]>;
-def : ProcNoItin<"arm920t", [HasV4TOps]>;
-def : ProcNoItin<"arm922t", [HasV4TOps]>;
-def : ProcNoItin<"arm940t", [HasV4TOps]>;
-def : ProcNoItin<"ep9312", [HasV4TOps]>;
-
-// V5T Processors.
-def : ProcNoItin<"arm10tdmi", [HasV5TOps]>;
-def : ProcNoItin<"arm1020t", [HasV5TOps]>;
-
-// V5TE Processors.
-def : ProcNoItin<"arm9e", [HasV5TEOps]>;
-def : ProcNoItin<"arm926ej-s", [HasV5TEOps]>;
-def : ProcNoItin<"arm946e-s", [HasV5TEOps]>;
-def : ProcNoItin<"arm966e-s", [HasV5TEOps]>;
-def : ProcNoItin<"arm968e-s", [HasV5TEOps]>;
-def : ProcNoItin<"arm10e", [HasV5TEOps]>;
-def : ProcNoItin<"arm1020e", [HasV5TEOps]>;
-def : ProcNoItin<"arm1022e", [HasV5TEOps]>;
-def : ProcNoItin<"xscale", [HasV5TEOps]>;
-def : ProcNoItin<"iwmmxt", [HasV5TEOps]>;
-
-// V6 Processors.
-def : Processor<"arm1136j-s", ARMV6Itineraries, [HasV6Ops]>;
-def : Processor<"arm1136jf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
-
-// V6M Processors.
-def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
- FeatureDB, FeatureMClass]>;
-def : Processor<"cortex-m0plus", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
- FeatureDB, FeatureMClass]>;
-def : Processor<"cortex-m1", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
- FeatureDB, FeatureMClass]>;
-def : Processor<"sc000", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
- FeatureDB, FeatureMClass]>;
-
-// V6K Processors.
-def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6KOps]>;
-def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
-def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6KOps]>;
-def : Processor<"mpcore", ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
-
-// V6T2 Processors.
-def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops,
- FeatureDSPThumb2]>;
-def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
- FeatureHasSlowFPVMLx,
- FeatureDSPThumb2]>;
-
-// V7a Processors.
// FIXME: A5 has currently the same Schedule model as A8
-def : ProcessorModel<"cortex-a5", CortexA8Model,
- [ProcA5, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureVFP4, FeatureDSPThumb2,
- FeatureHasRAS, FeatureAClass]>;
-def : ProcessorModel<"cortex-a7", CortexA8Model,
- [ProcA7, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS,
- FeatureAClass]>;
-def : ProcessorModel<"cortex-a8", CortexA8Model,
- [ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS,
- FeatureAClass]>;
-def : ProcessorModel<"cortex-a9", CortexA9Model,
- [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS, FeatureMP,
- FeatureAClass]>;
+def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureMP,
+ FeatureVFP4]>;
+
+def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureMP,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureVirtualization]>;
+
+def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk]>;
+
+def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureFP16,
+ FeatureAvoidPartialCPSR,
+ FeatureMP]>;
// FIXME: A12 has currently the same Schedule model as A9
-def : ProcessorModel<"cortex-a12", CortexA9Model,
- [ProcA12, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureMP,
- FeatureHasRAS, FeatureAClass]>;
-
-// FIXME: A15 has currently the same ProcessorModel as A9.
-def : ProcessorModel<"cortex-a15", CortexA9Model,
- [ProcA15, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS,
- FeatureAClass]>;
+def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureVirtualization,
+ FeatureMP]>;
+
+// FIXME: A15 has currently the same Schedule model as A9.
+def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureMP,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureVirtualization]>;
// FIXME: A17 has currently the same Schedule model as A9
-def : ProcessorModel<"cortex-a17", CortexA9Model,
- [ProcA17, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureMP,
- FeatureHasRAS, FeatureAClass]>;
+def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureVirtualization]>;
// FIXME: krait has currently the same Schedule model as A9
-def : ProcessorModel<"krait", CortexA9Model,
- [ProcKrait, HasV7Ops,
- FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS,
- FeatureAClass]>;
+// FIXME: krait has currently the same features as A9 plus VFP4 and hardware
+// division features.
+def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
+ FeatureHasRAS,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureFP16,
+ FeatureAvoidPartialCPSR,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM]>;
+
+def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
+ FeatureHasRAS,
+ FeatureNEONForFP,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureMP,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureAvoidMOVsShOp,
+ FeatureHasSlowFPVMLx]>;
// FIXME: R4 has currently the same ProcessorModel as A8.
-def : ProcessorModel<"cortex-r4", CortexA8Model,
- [ProcR4]>;
+def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4,
+ FeatureHasRAS,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
// FIXME: R4F has currently the same ProcessorModel as A8.
-def : ProcessorModel<"cortex-r4f", CortexA8Model,
- [ProcR4,
- FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVFP3, FeatureD16]>;
+def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4,
+ FeatureHasRAS,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVFP3,
+ FeatureD16,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
// FIXME: R5 has currently the same ProcessorModel as A8.
-def : ProcessorModel<"cortex-r5", CortexA8Model,
- [ProcR5, HasV7Ops, FeatureDB,
- FeatureVFP3, FeatureDSPThumb2,
- FeatureHasRAS,
- FeatureD16, FeatureRClass]>;
+def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
+ FeatureHasRAS,
+ FeatureVFP3,
+ FeatureD16,
+ FeatureSlowFPBrcc,
+ FeatureHWDivARM,
+ FeatureHasSlowFPVMLx,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
// FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5.
-def : ProcessorModel<"cortex-r7", CortexA8Model,
- [ProcR5, HasV7Ops, FeatureDB,
- FeatureVFP3, FeatureDSPThumb2,
- FeatureHasRAS, FeatureVFPOnlySP,
- FeatureD16, FeatureMP, FeatureRClass]>;
-
-// V7M Processors.
-def : ProcNoItin<"cortex-m3", [HasV7Ops,
- FeatureThumb2, FeatureNoARM, FeatureDB,
- FeatureHWDiv, FeatureMClass]>;
-def : ProcNoItin<"sc300", [HasV7Ops,
- FeatureThumb2, FeatureNoARM, FeatureDB,
- FeatureHWDiv, FeatureMClass]>;
-
-// V7EM Processors.
-def : ProcNoItin<"cortex-m4", [HasV7Ops,
- FeatureThumb2, FeatureNoARM, FeatureDB,
- FeatureHWDiv, FeatureDSPThumb2,
- FeatureT2XtPk, FeatureVFP4,
- FeatureVFPOnlySP, FeatureD16,
- FeatureMClass]>;
-def : ProcNoItin<"cortex-m7", [HasV7Ops,
- FeatureThumb2, FeatureNoARM, FeatureDB,
- FeatureHWDiv, FeatureDSPThumb2,
- FeatureT2XtPk, FeatureFPARMv8,
- FeatureD16, FeatureMClass]>;
-
-
-// Swift uArch Processors.
-def : ProcessorModel<"swift", SwiftModel,
- [ProcSwift, HasV7Ops, FeatureNEON,
- FeatureDB, FeatureDSPThumb2,
- FeatureHasRAS, FeatureAClass]>;
-
-// V8 Processors
-def : ProcNoItin<"cortex-a53", [ProcA53, HasV8Ops, FeatureAClass,
- FeatureDB, FeatureFPARMv8,
- FeatureNEON, FeatureDSPThumb2]>;
-def : ProcNoItin<"cortex-a57", [ProcA57, HasV8Ops, FeatureAClass,
- FeatureDB, FeatureFPARMv8,
- FeatureNEON, FeatureDSPThumb2]>;
-// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
-def : ProcNoItin<"cortex-a72", [ProcA57, HasV8Ops, FeatureAClass,
- FeatureDB, FeatureFPARMv8,
- FeatureNEON, FeatureDSPThumb2]>;
+def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
+ FeatureHasRAS,
+ FeatureVFP3,
+ FeatureVFPOnlySP,
+ FeatureD16,
+ FeatureFP16,
+ FeatureMP,
+ FeatureSlowFPBrcc,
+ FeatureHWDivARM,
+ FeatureHasSlowFPVMLx,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
+
+def : ProcNoItin<"cortex-m3", [ARMv7m]>;
+def : ProcNoItin<"sc300", [ARMv7m]>;
+
+def : ProcNoItin<"cortex-m4", [ARMv7em,
+ FeatureVFP4,
+ FeatureVFPOnlySP,
+ FeatureD16]>;
+
+def : ProcNoItin<"cortex-m7", [ARMv7em,
+ FeatureFPARMv8,
+ FeatureD16]>;
+
+
+def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
// Cyclone is very similar to swift
-def : ProcessorModel<"cyclone", SwiftModel,
- [ProcSwift, HasV8Ops, HasV7Ops,
- FeatureCrypto, FeatureFPARMv8,
- FeatureDB,FeatureDSPThumb2,
- FeatureHasRAS, FeatureZCZeroing]>;
+def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
+ FeatureHasRAS,
+ FeatureNEONForFP,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureMP,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureAvoidMOVsShOp,
+ FeatureHasSlowFPVMLx,
+ FeatureCrypto,
+ FeatureZCZeroing]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
@@ -504,8 +677,15 @@ def ARMAsmWriter : AsmWriter {
bit isMCAsmWriter = 1;
}
+def ARMAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+ string Name = "ARM";
+ string BreakCharacters = ".";
+}
+
def ARM : Target {
// Pull in Instruction Info:
let InstructionSet = ARMInstrInfo;
let AssemblyWriters = [ARMAsmWriter];
+ let AssemblyParserVariants = [ARMAsmParserVariant];
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 738dded..206db96 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -60,7 +60,7 @@ using namespace llvm;
ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)), AFI(nullptr), MCP(nullptr),
- InConstantPool(false) {}
+ InConstantPool(false), OptimizationGoals(-1) {}
void ARMAsmPrinter::EmitFunctionBodyEnd() {
// Make sure to terminate any constant pools that were at the end
@@ -80,8 +80,8 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer->EmitLabel(CurrentFnSym);
}
-void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
- uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
+void ARMAsmPrinter::EmitXXStructor(const DataLayout &DL, const Constant *CV) {
+ uint64_t Size = getDataLayout().getTypeAllocSize(CV->getType());
assert(Size && "C++ constructor pointer had zero size!");
const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
@@ -106,9 +106,38 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<ARMSubtarget>();
SetupMachineFunction(MF);
+ const Function* F = MF.getFunction();
+ const TargetMachine& TM = MF.getTarget();
+
+ // Calculate this function's optimization goal.
+ unsigned OptimizationGoal;
+ if (F->hasFnAttribute(Attribute::OptimizeNone))
+ // For best debugging illusion, speed and small size sacrificed
+ OptimizationGoal = 6;
+ else if (F->optForMinSize())
+ // Aggressively for small size, speed and debug illusion sacrificed
+ OptimizationGoal = 4;
+ else if (F->optForSize())
+ // For small size, but speed and debugging illusion preserved
+ OptimizationGoal = 3;
+ else if (TM.getOptLevel() == CodeGenOpt::Aggressive)
+ // Aggressively for speed, small size and debug illusion sacrificed
+ OptimizationGoal = 2;
+ else if (TM.getOptLevel() > CodeGenOpt::None)
+ // For speed, but small size and good debug illusion preserved
+ OptimizationGoal = 1;
+ else // TM.getOptLevel() == CodeGenOpt::None
+ // For good debugging, but speed and small size preserved
+ OptimizationGoal = 5;
+
+ // Combine a new optimization goal with existing ones.
+ if (OptimizationGoals == -1) // uninitialized goals
+ OptimizationGoals = OptimizationGoal;
+ else if (OptimizationGoals != (int)OptimizationGoal) // conflicting goals
+ OptimizationGoals = 0;
if (Subtarget->isTargetCOFF()) {
- bool Internal = MF.getFunction()->hasInternalLinkage();
+ bool Internal = F->hasInternalLinkage();
COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC
: COFF::IMAGE_SYM_CLASS_EXTERNAL;
int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
@@ -198,22 +227,13 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
MCSymbol *ARMAsmPrinter::
GetARMJTIPICJumpTableLabel(unsigned uid) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
SmallString<60> Name;
- raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
+ raw_svector_ostream(Name) << DL.getPrivateGlobalPrefix() << "JTI"
<< getFunctionNumber() << '_' << uid;
return OutContext.getOrCreateSymbol(Name);
}
-
-MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
- const DataLayout *DL = TM.getDataLayout();
- SmallString<60> Name;
- raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH"
- << getFunctionNumber();
- return OutContext.getOrCreateSymbol(Name);
-}
-
bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) {
@@ -515,6 +535,17 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// generates code that does this, it is always safe to set.
OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
+
+ // The last attribute to be emitted is ABI_optimization_goals
+ MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+
+ if (OptimizationGoals > 0 &&
+ (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI()))
+ ATS.emitAttribute(ARMBuildAttrs::ABI_optimization_goals, OptimizationGoals);
+ OptimizationGoals = -1;
+
+ ATS.finishAttributeSection();
}
//===----------------------------------------------------------------------===//
@@ -532,7 +563,7 @@ static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
if (Subtarget->hasV8Ops())
return ARMBuildAttrs::v8;
else if (Subtarget->hasV7Ops()) {
- if (Subtarget->isMClass() && Subtarget->hasThumb2DSP())
+ if (Subtarget->isMClass() && Subtarget->hasDSP())
return ARMBuildAttrs::v7E_M;
return ARMBuildAttrs::v7;
} else if (Subtarget->hasV6T2Ops())
@@ -587,7 +618,7 @@ void ARMAsmPrinter::emitAttributes() {
// We consider krait as a "cortex-a9" + hwdiv CPU
// Enable hwdiv through ".arch_extension idiv"
if (STI.hasDivide() || STI.hasDivideInARMMode())
- ATS.emitArchExtension(ARM::AEK_HWDIV);
+ ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM);
} else
ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
}
@@ -807,8 +838,6 @@ void ARMAsmPrinter::emitAttributes() {
else if (STI.hasVirtualization())
ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
ARMBuildAttrs::AllowVirtualization);
-
- ATS.finishAttributeSection();
}
//===----------------------------------------------------------------------===//
@@ -828,8 +857,7 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
case ARMCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD;
case ARMCP::TPOFF: return MCSymbolRefExpr::VK_TPOFF;
case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_GOTTPOFF;
- case ARMCP::GOT: return MCSymbolRefExpr::VK_GOT;
- case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_GOTOFF;
+ case ARMCP::GOT_PREL: return MCSymbolRefExpr::VK_ARM_GOT_PREL;
}
llvm_unreachable("Invalid ARMCPModifier!");
}
@@ -875,8 +903,8 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
void ARMAsmPrinter::
EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
- const DataLayout *DL = TM.getDataLayout();
- int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType());
+ const DataLayout &DL = getDataLayout();
+ int Size = DL.getTypeAllocSize(MCPV->getType());
ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
@@ -909,10 +937,9 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
OutContext);
if (ACPV->getPCAdjustment()) {
- MCSymbol *PCLabel = getPICLabel(DL->getPrivateGlobalPrefix(),
- getFunctionNumber(),
- ACPV->getLabelId(),
- OutContext);
+ MCSymbol *PCLabel =
+ getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ ACPV->getLabelId(), OutContext);
const MCExpr *PCRelExpr = MCSymbolRefExpr::create(PCLabel, OutContext);
PCRelExpr =
MCBinaryExpr::createAdd(PCRelExpr,
@@ -1136,6 +1163,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
Offset = 0;
break;
case ARM::ADDri:
+ case ARM::t2ADDri:
Offset = -MI->getOperand(2).getImm();
break;
case ARM::SUBri:
@@ -1198,7 +1226,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
#include "ARMGenMCPseudoLowering.inc"
void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// If we just ended a constant pool, mark it as such.
if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
@@ -1355,9 +1383,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
- MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(),
- getFunctionNumber(),
- MI->getOperand(2).getImm(), OutContext);
+ MCSymbol *LabelSym =
+ getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ MI->getOperand(2).getImm(), OutContext);
const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext);
unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4;
const MCExpr *PCRelExpr =
@@ -1388,9 +1416,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
- MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(),
- getFunctionNumber(),
- MI->getOperand(3).getImm(), OutContext);
+ MCSymbol *LabelSym =
+ getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ MI->getOperand(3).getImm(), OutContext);
const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext);
unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4;
const MCExpr *PCRelExpr =
@@ -1414,10 +1442,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// This adds the address of LPC0 to r0.
// Emit the label.
- OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
+ OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
getFunctionNumber(),
- MI->getOperand(2).getImm(),
- OutContext));
+ MI->getOperand(2).getImm(), OutContext));
// Form and emit the add.
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
@@ -1436,10 +1463,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// This adds the address of LPC0 to r0.
// Emit the label.
- OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
+ OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
getFunctionNumber(),
- MI->getOperand(2).getImm(),
- OutContext));
+ MI->getOperand(2).getImm(), OutContext));
// Form and emit the add.
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDrr)
@@ -1468,10 +1494,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// a PC-relative address at the ldr instruction.
// Emit the label.
- OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
+ OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
getFunctionNumber(),
- MI->getOperand(2).getImm(),
- OutContext));
+ MI->getOperand(2).getImm(), OutContext));
// Form and emit the load
unsigned Opcode;
@@ -1519,7 +1544,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MCPE.isMachineConstantPoolEntry())
EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
else
- EmitGlobalConstant(MCPE.Val.ConstVal);
+ EmitGlobalConstant(DL, MCPE.Val.ConstVal);
return;
}
case ARM::JUMPTABLE_ADDRS:
@@ -1653,12 +1678,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// adds $val, #7
// str $val, [$src, #4]
// movs r0, #0
- // b 1f
+ // b LSJLJEH
// movs r0, #1
- // 1:
+ // LSJLJEH:
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ValReg = MI->getOperand(1).getReg();
- MCSymbol *Label = GetARMSJLJEHLabel();
+ MCSymbol *Label = OutContext.createTempSymbol("SJLJEH", false, true);
OutStreamer->AddComment("eh_setjmp begin");
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
.addReg(ValReg)
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
index 3d25121..ed7be2d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -51,6 +51,11 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
/// labels used for ARMv4t thumb code to make register indirect calls.
SmallVector<std::pair<unsigned, MCSymbol*>, 4> ThumbIndirectPads;
+ /// OptimizationGoals - Maintain a combined optimization goal for all
+ /// functions in a module: one of Tag_ABI_optimization_goals values,
+ /// -1 if uninitialized, 0 if conflicting goals
+ int OptimizationGoals;
+
public:
explicit ARMAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer);
@@ -84,7 +89,7 @@ public:
void EmitFunctionEntryLabel() override;
void EmitStartOfAsmFile(Module &M) override;
void EmitEndOfAsmFile(Module &M) override;
- void EmitXXStructor(const Constant *CV) override;
+ void EmitXXStructor(const DataLayout &DL, const Constant *CV) override;
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
@@ -119,8 +124,6 @@ private:
MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
MCSymbol *GetARMJTIPICJumpTableLabel(unsigned uid) const;
- MCSymbol *GetARMSJLJEHLabel() const;
-
MCSymbol *GetARMGVSymbol(const GlobalValue *GV, unsigned char TargetFlags);
public:
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9f43e73..49f3288 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -97,7 +97,7 @@ ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
Subtarget(STI) {
for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
- assert(false && "Duplicated entries?");
+ llvm_unreachable("Duplicated entries?");
MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
}
@@ -440,7 +440,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
if (MI->isBundle()) {
- MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
int PIdx = I->findFirstPredOperandIdx();
@@ -518,7 +518,7 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
static bool isCPSRDefined(const MachineInstr *MI) {
for (const auto &MO : MI->operands())
- if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef())
+ if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
return true;
return false;
}
@@ -647,7 +647,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
unsigned Size = 0;
- MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
assert(!I->isBundle() && "No nested bundle!");
@@ -853,11 +853,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- Align);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), Align);
switch (RC->getSize()) {
case 4:
@@ -1043,12 +1041,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- Align);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), Align);
switch (RC->getSize()) {
case 4:
@@ -1224,6 +1219,60 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
+/// \brief Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
+/// depending on whether the result is used.
+void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MBBI) const {
+ bool isThumb1 = Subtarget.isThumb1Only();
+ bool isThumb2 = Subtarget.isThumb2();
+ const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
+
+ MachineInstr *MI = MBBI;
+ DebugLoc dl = MI->getDebugLoc();
+ MachineBasicBlock *BB = MI->getParent();
+
+ MachineInstrBuilder LDM, STM;
+ if (isThumb1 || !MI->getOperand(1).isDead()) {
+ LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
+ : isThumb1 ? ARM::tLDMIA_UPD
+ : ARM::LDMIA_UPD))
+ .addOperand(MI->getOperand(1));
+ } else {
+ LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
+ }
+
+ if (isThumb1 || !MI->getOperand(0).isDead()) {
+ STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
+ : isThumb1 ? ARM::tSTMIA_UPD
+ : ARM::STMIA_UPD))
+ .addOperand(MI->getOperand(0));
+ } else {
+ STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
+ }
+
+ AddDefaultPred(LDM.addOperand(MI->getOperand(3)));
+ AddDefaultPred(STM.addOperand(MI->getOperand(2)));
+
+ // Sort the scratch registers into ascending order.
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+ llvm::SmallVector<unsigned, 6> ScratchRegs;
+ for(unsigned I = 5; I < MI->getNumOperands(); ++I)
+ ScratchRegs.push_back(MI->getOperand(I).getReg());
+ std::sort(ScratchRegs.begin(), ScratchRegs.end(),
+ [&TRI](const unsigned &Reg1,
+ const unsigned &Reg2) -> bool {
+ return TRI.getEncodingValue(Reg1) <
+ TRI.getEncodingValue(Reg2);
+ });
+
+ for (const auto &Reg : ScratchRegs) {
+ LDM.addReg(Reg, RegState::Define);
+ STM.addReg(Reg, RegState::Kill);
+ }
+
+ BB->erase(MBBI);
+}
+
+
bool
ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MachineFunction &MF = *MI->getParent()->getParent();
@@ -1237,6 +1286,11 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
return true;
}
+ if (MI->getOpcode() == ARM::MEMCPY) {
+ expandMEMCPY(MI);
+ return true;
+ }
+
// This hook gets to expand COPY instructions before they become
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
@@ -1325,9 +1379,9 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
// instructions, so that's probably OK, but is PIC always correct when
// we get here?
if (ACPV->isGlobalValue())
- NewCPV = ARMConstantPoolConstant::
- Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId,
- ARMCP::CPValue, 4);
+ NewCPV = ARMConstantPoolConstant::Create(
+ cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
+ 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
else if (ACPV->isExtSymbol())
NewCPV = ARMConstantPoolSymbol::
Create(MF.getFunction()->getContext(),
@@ -1645,16 +1699,14 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
bool ARMBaseInstrInfo::
isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles, unsigned ExtraPredCycles,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
if (!NumCycles)
return false;
// If we are optimizing for size, see if the branch in the predecessor can be
// lowered to cbn?z by the constant island lowering pass, and return false if
// so. This results in a shorter instruction sequence.
- const Function *F = MBB.getParent()->getFunction();
- if (F->hasFnAttribute(Attribute::OptimizeForSize) ||
- F->hasFnAttribute(Attribute::MinSize)) {
+ if (MBB.getParent()->getFunction()->optForSize()) {
MachineBasicBlock *Pred = *MBB.pred_begin();
if (!Pred->empty()) {
MachineInstr *LastMI = &*Pred->rbegin();
@@ -1677,12 +1729,14 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
}
// Attempt to estimate the relative costs of predication versus branching.
- unsigned UnpredCost = Probability.getNumerator() * NumCycles;
- UnpredCost /= Probability.getDenominator();
- UnpredCost += 1; // The branch itself
- UnpredCost += Subtarget.getMispredictionPenalty() / 10;
-
- return (NumCycles + ExtraPredCycles) <= UnpredCost;
+ // Here we scale up each component of UnpredCost to avoid precision issue when
+ // scaling NumCycles by Probability.
+ const unsigned ScalingUpFactor = 1024;
+ unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor);
+ UnpredCost += ScalingUpFactor; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
+
+ return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost;
}
bool ARMBaseInstrInfo::
@@ -1690,23 +1744,22 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned TCycles, unsigned TExtra,
MachineBasicBlock &FMBB,
unsigned FCycles, unsigned FExtra,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
if (!TCycles || !FCycles)
return false;
// Attempt to estimate the relative costs of predication versus branching.
- unsigned TUnpredCost = Probability.getNumerator() * TCycles;
- TUnpredCost /= Probability.getDenominator();
-
- uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
- unsigned FUnpredCost = Comp * FCycles;
- FUnpredCost /= Probability.getDenominator();
-
+ // Here we scale up each component of UnpredCost to avoid precision issue when
+ // scaling TCycles/FCycles by Probability.
+ const unsigned ScalingUpFactor = 1024;
+ unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
+ unsigned FUnpredCost =
+ Probability.getCompl().scale(FCycles * ScalingUpFactor);
unsigned UnpredCost = TUnpredCost + FUnpredCost;
- UnpredCost += 1; // The branch itself
- UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+ UnpredCost += 1 * ScalingUpFactor; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
- return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
+ return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost;
}
bool
@@ -1744,9 +1797,10 @@ unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
llvm_unreachable("Unknown unconditional branch opcode!");
}
-/// commuteInstruction - Handle commutable instructions.
-MachineInstr *
-ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
switch (MI->getOpcode()) {
case ARM::MOVCCr:
case ARM::t2MOVCCr: {
@@ -1756,7 +1810,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// MOVCC AL can't be inverted. Shouldn't happen.
if (CC == ARMCC::AL || PredReg != ARM::CPSR)
return nullptr;
- MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+ MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
if (!MI)
return nullptr;
// After swapping the MOVCC operands, also invert the condition.
@@ -1765,7 +1819,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
return MI;
}
}
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
/// Identify instructions that can be folded into a MOVCC instruction, and
@@ -1975,21 +2029,12 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
}
}
-static bool isAnySubRegLive(unsigned Reg, const TargetRegisterInfo *TRI,
- MachineInstr *MI) {
- for (MCSubRegIterator Subreg(Reg, TRI, /* IncludeSelf */ true);
- Subreg.isValid(); ++Subreg)
- if (MI->getParent()->computeRegisterLiveness(TRI, *Subreg, MI) !=
- MachineBasicBlock::LQR_Dead)
- return true;
- return false;
-}
bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
MachineFunction &MF, MachineInstr *MI,
unsigned NumBytes) {
// This optimisation potentially adds lots of load and store
// micro-operations, it's only really a great benefit to code-size.
- if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize))
+ if (!MF.getFunction()->optForMinSize())
return false;
// If only one register is pushed/popped, LLVM can use an LDR/STR
@@ -2058,11 +2103,9 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
// registers live within the function we might clobber a return value
// register; the other way a register can be live here is if it's
// callee-saved.
- // TODO: Currently, computeRegisterLiveness() does not report "live" if a
- // sub reg is live. When computeRegisterLiveness() works for sub reg, it
- // can replace isAnySubRegLive().
if (isCalleeSavedRegister(CurReg, CSRegs) ||
- isAnySubRegLive(CurReg, TRI, MI)) {
+ MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
+ MachineBasicBlock::LQR_Dead) {
// VFP pops don't allow holes in the register list, so any skip is fatal
// for our transformation. GPR pops do, so we should just keep looking.
if (IsVFPPushPop)
@@ -3381,7 +3424,7 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
assert(Idx != -1 && "Cannot find bundled definition!");
DefIdx = Idx;
- return II;
+ return &*II;
}
static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
@@ -3389,7 +3432,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
unsigned &UseIdx, unsigned &Dist) {
Dist = 0;
- MachineBasicBlock::const_instr_iterator II = MI; ++II;
+ MachineBasicBlock::const_instr_iterator II = ++MI->getIterator();
assert(II->isInsideBundle() && "Empty bundle?");
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
@@ -3410,7 +3453,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
}
UseIdx = Idx;
- return II;
+ return &*II;
}
/// Return the number of cycles to add to (or subtract from) the static
@@ -3652,6 +3695,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// instructions).
if (Latency > 0 && Subtarget.isThumb2()) {
const MachineFunction *MF = DefMI->getParent()->getParent();
+ // FIXME: Use Function::optForSize().
if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
--Latency;
}
@@ -3931,11 +3975,11 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
// other passes may query the latency of a bundled instruction.
if (MI->isBundle()) {
unsigned Latency = 0;
- MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
if (I->getOpcode() != ARM::t2IT)
- Latency += getInstrLatency(ItinData, I, PredCost);
+ Latency += getInstrLatency(ItinData, &*I, PredCost);
}
return Latency;
}
@@ -4054,8 +4098,8 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
MIB.addReg(Reg, RegState::Kill).addImm(0);
unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
- MachineMemOperand *MMO = MBB.getParent()->
- getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 4, 4);
+ MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4);
MIB.addMemOperand(MMO);
AddDefaultPred(MIB);
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index b4706e3..d80c494 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -86,6 +86,18 @@ protected:
RegSubRegPair &BaseReg,
RegSubRegPairAndIdx &InsertedReg) const override;
+ /// Commutes the operands in the given instruction.
+ /// The commutable operands are specified by their indices OpIdx1 and OpIdx2.
+ ///
+ /// Do not call this method for a non-commutable instruction or for
+ /// non-commutable pair of operand indices OpIdx1 and OpIdx2.
+ /// Even though the instruction is commutable, the method may still
+ /// fail to commute the operands, null pointer is returned in such cases.
+ MachineInstr *commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const override;
+
public:
// Return whether the target has an explicit NOP encoding.
bool hasNOP() const;
@@ -188,9 +200,6 @@ public:
MachineInstr *duplicate(MachineInstr *Orig,
MachineFunction &MF) const override;
- MachineInstr *commuteInstruction(MachineInstr*,
- bool=false) const override;
-
const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
unsigned SubIdx, unsigned State,
const TargetRegisterInfo *TRI) const;
@@ -224,15 +233,15 @@ public:
bool isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles, unsigned ExtraPredCycles,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
unsigned ExtraT, MachineBasicBlock &FMBB,
unsigned NumF, unsigned ExtraF,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
- const BranchProbability &Probability) const override {
+ BranchProbability Probability) const override {
return NumCycles == 1;
}
@@ -343,6 +352,8 @@ private:
virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI,
Reloc::Model RM) const = 0;
+ void expandMEMCPY(MachineBasicBlock::iterator) const;
+
private:
/// Modeling special VFP / NEON fp MLA / MLS hazards.
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index e7d5be77..419717c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -225,7 +225,8 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
ArrayRef<MCPhysReg> Order,
SmallVectorImpl<MCPhysReg> &Hints,
const MachineFunction &MF,
- const VirtRegMap *VRM) const {
+ const VirtRegMap *VRM,
+ const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo &MRI = MF.getRegInfo();
std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
@@ -338,7 +339,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
// 1. Dynamic stack realignment is explicitly disabled,
// 2. This is a Thumb1 function (it's not useful, so we don't bother), or
// 3. There are VLAs in the function and the base pointer is disabled.
- if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+ if (!TargetRegisterInfo::canRealignStack(MF))
return false;
if (AFI->isThumb1OnlyFunction())
return false;
@@ -356,18 +357,6 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
}
bool ARMBaseRegisterInfo::
-needsStackRealignment(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARMFrameLowering *TFI = getFrameLowering(MF);
- const Function *F = MF.getFunction();
- unsigned StackAlign = TFI->getStackAlignment();
- bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttribute(Attribute::StackAlignment));
-
- return requiresRealignment && canRealignStack(MF);
-}
-
-bool ARMBaseRegisterInfo::
cannotEliminateFrame(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack())
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index fdc1ef9..cea8b80 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -94,7 +94,7 @@ public:
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
- const uint32_t *getNoPreservedMask() const;
+ const uint32_t *getNoPreservedMask() const override;
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
/// case that 'returned' is on an i32 first argument if the calling convention
@@ -126,15 +126,15 @@ public:
ArrayRef<MCPhysReg> Order,
SmallVectorImpl<MCPhysReg> &Hints,
const MachineFunction &MF,
- const VirtRegMap *VRM) const override;
+ const VirtRegMap *VRM,
+ const LiveRegMatrix *Matrix) const override;
void updateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const override;
bool hasBasePointer(const MachineFunction &MF) const;
- bool canRealignStack(const MachineFunction &MF) const;
- bool needsStackRealignment(const MachineFunction &MF) const override;
+ bool canRealignStack(const MachineFunction &MF) const override;
int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
int Idx) const override;
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
index d687568..a731d00 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -160,15 +160,15 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
State);
}
-static const uint16_t RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+static const MCPhysReg RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
- ARM::S4, ARM::S5, ARM::S6, ARM::S7,
- ARM::S8, ARM::S9, ARM::S10, ARM::S11,
- ARM::S12, ARM::S13, ARM::S14, ARM::S15 };
-static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
- ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
-static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
+static const MCPhysReg SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12, ARM::S13, ARM::S14, ARM::S15 };
+static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
+static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
@@ -199,9 +199,11 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
// Try to allocate a contiguous block of registers, each of the correct
// size to hold one member.
- unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U);
+ auto &DL = State.getMachineFunction().getDataLayout();
+ unsigned StackAlign = DL.getStackAlignment();
+ unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign);
- ArrayRef<uint16_t> RegList;
+ ArrayRef<MCPhysReg> RegList;
switch (LocVT.SimpleTy) {
case MVT::i32: {
RegList = RRegList;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index 27cf06b..2335164 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -125,6 +125,8 @@ def CC_ARM_AAPCS_Common : CallingConv<[
CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>,
CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>,
+ CCIfType<[v2f64], CCIfAlign<"16",
+ CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>,
CCIfType<[v2f64], CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>>
]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index f4ec8c6..e89757c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -342,7 +342,7 @@ void ARMConstantIslands::verify() {
#ifndef NDEBUG
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
- MachineBasicBlock *MBB = MBBI;
+ MachineBasicBlock *MBB = &*MBBI;
unsigned MBBId = MBB->getNumber();
assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
}
@@ -542,7 +542,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
// identity mapping of CPI's to CPE's.
const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const DataLayout &TD = *MF->getTarget().getDataLayout();
+ const DataLayout &TD = MF->getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
assert(Size >= 4 && "Too small constant pool entry");
@@ -589,6 +589,8 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
for (MachineBasicBlock &MBB : *MF) {
auto MI = MBB.getLastNonDebugInstr();
+ if (MI == MBB.end())
+ continue;
unsigned JTOpcode;
switch (MI->getOpcode()) {
@@ -639,12 +641,12 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
/// into the block immediately after it.
bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
- MachineFunction::iterator MBBI = MBB;
+ MachineFunction::iterator MBBI = MBB->getIterator();
// Can't fall off end of function.
if (std::next(MBBI) == MBB->getParent()->end())
return false;
- MachineBasicBlock *NextBB = std::next(MBBI);
+ MachineBasicBlock *NextBB = &*std::next(MBBI);
if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end())
return false;
@@ -722,15 +724,15 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// has any inline assembly in it. If so, we have to be conservative about
// alignment assumptions, as we don't know for sure the size of any
// instructions in the inline assembly.
- for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
- computeBlockSize(I);
+ for (MachineBasicBlock &MBB : *MF)
+ computeBlockSize(&MBB);
// The known bits of the entry block offset are determined by the function
// alignment.
BBInfo.front().KnownBits = MF->getAlignment();
// Compute block offsets and known bits.
- adjustBBOffsetsAfter(MF->begin());
+ adjustBBOffsetsAfter(&MF->front());
// Now go back through the instructions and build up our data structures.
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
@@ -968,7 +970,7 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
- MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MachineFunction::iterator MBBI = ++OrigBB->getIterator();
MF->insert(MBBI, NewBB);
// Splice the instructions starting with MI over to NewBB.
@@ -1088,7 +1090,7 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
unsigned NextBlockOffset, NextBlockAlignment;
- MachineFunction::const_iterator NextBlock = Water;
+ MachineFunction::const_iterator NextBlock = Water->getIterator();
if (++NextBlock == MF->end()) {
NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
NextBlockAlignment = 0;
@@ -1350,7 +1352,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
if (isOffsetInRange(UserOffset, CPEOffset, U)) {
DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
<< format(", expected CPE offset %#x\n", CPEOffset));
- NewMBB = std::next(MachineFunction::iterator(UserMBB));
+ NewMBB = &*++UserMBB->getIterator();
// Add an unconditional branch from UserMBB to fallthrough block. Record
// it for branch lengthening; this new branch will not get out of range,
// but if the preceding conditional branch is out of range, the targets
@@ -1503,8 +1505,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
NewWaterList.insert(NewIsland);
// The new CPE goes before the following block (NewMBB).
- NewMBB = std::next(MachineFunction::iterator(WaterBB));
-
+ NewMBB = &*++WaterBB->getIterator();
} else {
// No water found.
DEBUG(dbgs() << "No water found\n");
@@ -1515,7 +1516,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// next iteration for constant pools, but in this context, we don't want
// it. Check for this so it will be removed from the WaterList.
// Also remove any entry from NewWaterList.
- MachineBasicBlock *WaterBB = std::prev(MachineFunction::iterator(NewMBB));
+ MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
if (IP != WaterList.end())
NewWaterList.erase(WaterBB);
@@ -1532,7 +1533,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
WaterList.erase(IP);
// Okay, we know we can put an island before NewMBB now, do it!
- MF->insert(NewMBB, NewIsland);
+ MF->insert(NewMBB->getIterator(), NewIsland);
// Update internal data structures to account for the newly inserted MBB.
updateForInsertedWaterBlock(NewIsland);
@@ -1553,7 +1554,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// Increase the size of the island block to account for the new entry.
BBInfo[NewIsland->getNumber()].Size += Size;
- adjustBBOffsetsAfter(std::prev(MachineFunction::iterator(NewIsland)));
+ adjustBBOffsetsAfter(&*--NewIsland->getIterator());
// Finally, change the CPI in the instruction operand to be ID.
for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1732,7 +1733,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
MBB->back().eraseFromParent();
// BBInfo[SplitBB].Offset is wrong temporarily, fixed below
}
- MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *NextBB = &*++MBB->getIterator();
DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
<< " also invert condition and change dest. to BB#"
@@ -2058,9 +2059,9 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
/// \brief Returns whether CPEMI is the first instruction in the block
/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
/// we can switch the first register to PC and usually remove the address
-/// calculation that preceeded it.
+/// calculation that preceded it.
static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
- MachineFunction::iterator MBB = JTMI->getParent();
+ MachineFunction::iterator MBB = JTMI->getParent()->getIterator();
MachineFunction *MF = MBB->getParent();
++MBB;
@@ -2235,7 +2236,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
SmallVector<MachineOperand, 4> CondPrior;
- MachineFunction::iterator BBi = BB;
+ MachineFunction::iterator BBi = BB->getIterator();
MachineFunction::iterator OldPrior = std::prev(BBi);
// If the block terminator isn't analyzable, don't try to move the block
@@ -2258,7 +2259,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// Create a new MBB for the code after the jump BB.
MachineBasicBlock *NewBB =
MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
- MachineFunction::iterator MBBI = JTBB; ++MBBI;
+ MachineFunction::iterator MBBI = ++JTBB->getIterator();
MF->insert(MBBI, NewBB);
// Add an unconditional branch from NewBB to BB.
@@ -2273,8 +2274,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// Update the CFG.
NewBB->addSuccessor(BB);
- JTBB->removeSuccessor(BB);
- JTBB->addSuccessor(NewBB);
+ JTBB->replaceSuccessor(BB, NewBB);
++NumJTInserted;
return NewBB;
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
index 7d41c69..c9849b2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -52,8 +52,7 @@ const char *ARMConstantPoolValue::getModifierText() const {
// strings if that's legal.
case ARMCP::no_modifier: return "none";
case ARMCP::TLSGD: return "tlsgd";
- case ARMCP::GOT: return "GOT";
- case ARMCP::GOTOFF: return "GOTOFF";
+ case ARMCP::GOT_PREL: return "GOT_PREL";
case ARMCP::GOTTPOFF: return "gottpoff";
case ARMCP::TPOFF: return "tpoff";
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
index 36f63e2..6b18a4e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -39,8 +39,7 @@ namespace ARMCP {
enum ARMCPModifier {
no_modifier,
TLSGD,
- GOT,
- GOTOFF,
+ GOT_PREL,
GOTTPOFF,
TPOFF
};
@@ -103,8 +102,6 @@ public:
bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
- unsigned getRelocationInfo() const override { return 2; }
-
int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 4438f50..56f3498 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -330,22 +330,19 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
/// load or store pseudo instruction.
static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
- const unsigned NumEntries = array_lengthof(NEONLdStTable);
-
#ifndef NDEBUG
// Make sure the table is sorted.
static bool TableChecked = false;
if (!TableChecked) {
- for (unsigned i = 0; i != NumEntries-1; ++i)
- assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
- "NEONLdStTable is not sorted!");
+ assert(std::is_sorted(std::begin(NEONLdStTable), std::end(NEONLdStTable)) &&
+ "NEONLdStTable is not sorted!");
TableChecked = true;
}
#endif
- const NEONLdStTableEntry *I =
- std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
- if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
+ auto I = std::lower_bound(std::begin(NEONLdStTable),
+ std::end(NEONLdStTable), Opcode);
+ if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode)
return I;
return nullptr;
}
@@ -734,7 +731,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
HI16.addImm(Pred).addReg(PredReg);
if (RequiresBundling)
- finalizeBundle(MBB, &*LO16, &*MBBI);
+ finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator());
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
@@ -747,6 +744,55 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
switch (Opcode) {
default:
return false;
+
+ case ARM::TCRETURNdi:
+ case ARM::TCRETURNri: {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->isReturn() &&
+ "Can only insert epilog into returning blocks");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl = MBBI->getDebugLoc();
+ const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
+ MBB.getParent()->getSubtarget().getInstrInfo());
+
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+ // Jump to label or value in register.
+ if (RetOpcode == ARM::TCRETURNdi) {
+ unsigned TCOpcode =
+ STI->isThumb()
+ ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
+ : ARM::TAILJMPd;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+
+ // Add the default predicate in Thumb mode.
+ if (STI->isThumb())
+ MIB.addImm(ARMCC::AL).addReg(0);
+ } else if (RetOpcode == ARM::TCRETURNri) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr))
+ .addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = std::prev(MBBI);
+ for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ MBBI = NewMI;
+ return true;
+ }
case ARM::VMOVScc:
case ARM::VMOVDcc: {
unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index fdd0763..9bdf823c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -922,12 +922,9 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
if (Addr.BaseType == Address::FrameIndexBase) {
int FI = Addr.Base.FI;
int Offset = Addr.Offset;
- MachineMemOperand *MMO =
- FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI, Offset),
- Flags,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
// Now add the rest of the operands.
MIB.addFrameIndex(FI);
@@ -1278,8 +1275,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
.addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
- fastEmitBranch(FBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
@@ -1303,8 +1299,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
- fastEmitBranch(FBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
} else if (const ConstantInt *CI =
@@ -1341,8 +1336,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
- fastEmitBranch(FBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
@@ -1355,8 +1349,8 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
TII.get(Opc)).addReg(AddrReg));
const IndirectBrInst *IB = cast<IndirectBrInst>(I);
- for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
- FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
+ for (const BasicBlock *SuccBB : IB->successors())
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
return true;
}
@@ -1860,8 +1854,9 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
else
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
- } else
- return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ } else {
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ }
case CallingConv::ARM_AAPCS_VFP:
if (!isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
@@ -2944,48 +2939,51 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
unsigned Align, MVT VT) {
- bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
- ARMConstantPoolConstant *CPV =
- ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
- unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+ bool UseGOT_PREL =
+ !(GV->hasHiddenVisibility() || GV->hasLocalLinkage());
+
+ LLVMContext *Context = &MF->getFunction()->getContext();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
+ GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
+ UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
+ /*AddCurrentAddress=*/UseGOT_PREL);
+
+ unsigned ConstAlign =
+ MF->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
+ unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);
+
+ unsigned TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
+ unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg)
+ .addConstantPoolIndex(Idx);
+ if (Opc == ARM::LDRcp)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
- unsigned Opc;
- unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
- // Load value.
- if (isThumb2) {
- DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0);
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(ARM::t2LDRpci), DestReg1)
- .addConstantPoolIndex(Idx));
- Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
- } else {
- // The extra immediate is for addrmode2.
- DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0);
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DbgLoc, TII.get(ARM::LDRcp), DestReg1)
- .addConstantPoolIndex(Idx).addImm(0));
- Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs;
- }
+ // Fix the address by adding pc.
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR
+ : ARM::PICADD;
+ DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addReg(TempReg)
+ .addImm(ARMPCLabelIndex);
+ if (!Subtarget->isThumb())
+ AddDefaultPred(MIB);
- unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
- if (GlobalBaseReg == 0) {
- GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT));
- AFI->setGlobalBaseReg(GlobalBaseReg);
+ if (UseGOT_PREL && Subtarget->isThumb()) {
+ unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::t2LDRi12), NewDestReg)
+ .addReg(DestReg)
+ .addImm(0);
+ DestReg = NewDestReg;
+ AddOptionalDefs(MIB);
}
-
- unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
- DestReg2 = constrainOperandRegClass(TII.get(Opc), DestReg2, 0);
- DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1);
- GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2);
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DbgLoc, TII.get(Opc), DestReg2)
- .addReg(DestReg1)
- .addReg(GlobalBaseReg);
- if (!UseGOTOFF)
- MIB.addImm(0);
- AddOptionalDefs(MIB);
-
- return DestReg2;
+ return DestReg;
}
bool ARMFastISel::fastLowerArguments() {
@@ -3038,7 +3036,7 @@ bool ARMFastISel::fastLowerArguments() {
}
- static const uint16_t GPRArgRegs[] = {
+ static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
@@ -3055,7 +3053,7 @@ bool ARMFastISel::fastLowerArguments() {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY),
ResultReg).addReg(DstReg, getKillRegState(true));
- updateValueMap(I, ResultReg);
+ updateValueMap(&*I, ResultReg);
}
return true;
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 6744000..c5990bb 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCContext.h"
@@ -58,7 +59,7 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
// iOS requires FP not to be clobbered for backtracing purpose.
- if (STI.isTargetIOS())
+ if (STI.isTargetIOS() || STI.isTargetWatchOS())
return true;
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -288,7 +289,6 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
void ARMFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented");
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -305,7 +305,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc dl;
+
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// Determine the sizes of each callee-save spill areas and record which frame
@@ -489,7 +493,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
if (NumBytes) {
// Adjust SP after all the callee-save spills.
- if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
+ if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
+ tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
else {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
@@ -689,60 +694,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
AFI->setShouldRestoreSPFromFP(true);
}
-// Resolve TCReturn pseudo-instruction
-void ARMFrameLowering::fixTCReturn(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
- unsigned RetOpcode = MBBI->getOpcode();
- DebugLoc dl = MBBI->getDebugLoc();
- const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
-
- if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri))
- return;
-
- // Tail call return: adjust the stack pointer and jump to callee.
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &JumpTarget = MBBI->getOperand(0);
-
- // Jump to label or value in register.
- if (RetOpcode == ARM::TCRETURNdi) {
- unsigned TCOpcode = STI.isThumb() ?
- (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
- ARM::TAILJMPd;
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
- if (JumpTarget.isGlobal())
- MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
- else {
- assert(JumpTarget.isSymbol());
- MIB.addExternalSymbol(JumpTarget.getSymbolName(),
- JumpTarget.getTargetFlags());
- }
-
- // Add the default predicate in Thumb mode.
- if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
- } else if (RetOpcode == ARM::TCRETURNri) {
- BuildMI(MBB, MBBI, dl,
- TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
- addReg(JumpTarget.getReg(), RegState::Kill);
- }
-
- MachineInstr *NewMI = std::prev(MBBI);
- for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
- NewMI->addOperand(MBBI->getOperand(i));
-
- // Delete the pseudo instruction TCRETURN.
- MBB.erase(MBBI);
- MBBI = NewMI;
-}
-
void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
- DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -758,10 +711,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
- if (MF.getFunction()->getCallingConv() == CallingConv::GHC) {
- fixTCReturn(MF, MBB);
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
return;
- }
+
+ // First put ourselves on the first (from top) terminator instructions.
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
if (!AFI->hasStackFrame()) {
if (NumBytes - ArgRegsSaveSize != 0)
@@ -840,8 +795,6 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
}
- fixTCReturn(MF, MBB);
-
if (ArgRegsSaveSize)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
}
@@ -932,12 +885,6 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
return Offset;
}
-int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- unsigned FrameReg;
- return getFrameIndexReference(MF, FI, FrameReg);
-}
-
void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -950,7 +897,6 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
SmallVector<std::pair<unsigned,bool>, 4> Regs;
unsigned i = CSI.size();
@@ -1008,7 +954,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
// Put any subsequent vpush instructions before this one: they will refer to
// higher register numbers so need to be pushed first in order to preserve
// monotonicity.
- --MI;
+ if (MI != MBB.begin())
+ --MI;
}
}
@@ -1022,12 +969,20 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- DebugLoc DL = MI->getDebugLoc();
- unsigned RetOpcode = MI->getOpcode();
- bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
- RetOpcode == ARM::TCRETURNri);
- bool isInterrupt =
- RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
+ DebugLoc DL;
+ bool isTailCall = false;
+ bool isInterrupt = false;
+ bool isTrap = false;
+ if (MBB.end() != MI) {
+ DL = MI->getDebugLoc();
+ unsigned RetOpcode = MI->getOpcode();
+ isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
+ isInterrupt =
+ RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
+ isTrap =
+ RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
+ RetOpcode == ARM::tTRAP;
+ }
SmallVector<unsigned, 4> Regs;
unsigned i = CSI.size();
@@ -1043,11 +998,14 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
continue;
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
- STI.hasV5TOps()) {
- Reg = ARM::PC;
- LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+ !isTrap && STI.hasV5TOps()) {
+ if (MBB.succ_empty()) {
+ Reg = ARM::PC;
+ DeleteRet = true;
+ LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+ } else
+ LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
// Fold the return instruction into the LDM.
- DeleteRet = true;
}
// If NoGap is true, pop consecutive registers and then leave the rest
@@ -1068,7 +1026,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
.addReg(ARM::SP));
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i], getDefRegState(true));
- if (DeleteRet) {
+ if (DeleteRet && MI != MBB.end()) {
MIB.copyImplicitOps(&*MI);
MI->eraseFromParent();
}
@@ -1095,7 +1053,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
// Put any subsequent vpop instructions after this one: they will refer to
// higher register numbers so need to be popped afterwards.
- ++MI;
+ if (MI != MBB.end())
+ ++MI;
}
}
@@ -1109,7 +1068,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) {
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineFrameInfo &MFI = *MF.getFrameInfo();
@@ -1118,7 +1077,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// slot offsets can be wrong. The offset for d8 will always be correct.
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned DNum = CSI[i].getReg() - ARM::D8;
- if (DNum >= 8)
+ if (DNum > NumAlignedDPRCS2Regs - 1)
continue;
int FI = CSI[i].getFrameIdx();
// The even-numbered registers will be 16-byte aligned, the odd-numbered
@@ -1269,7 +1228,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) {
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Find the frame index assigned to d8.
@@ -1654,13 +1613,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// FIXME: We could add logic to be more precise about negative offsets
// and which instructions will need a scratch register for them. Is it
// worth the effort and added fragility?
- bool BigStack =
- (RS &&
- (MFI->estimateStackSize(MF) +
- ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
- estimateRSStackSizeLimit(MF, this)))
- || MFI->hasVarSizedObjects()
- || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
+ bool BigStack = (RS && (MFI->estimateStackSize(MF) +
+ ((hasFP(MF) && AFI->hasStackFrame()) ? 4 : 0) >=
+ estimateRSStackSizeLimit(MF, this))) ||
+ MFI->hasVarSizedObjects() ||
+ (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
bool ExtraCSSpill = false;
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
@@ -1698,8 +1655,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
unsigned Reg = UnspilledCS1GPRs[i];
- // Don't spill high register if the function is thumb
+ // Don't spill high register if the function is thumb. In the case of
+ // Windows on ARM, accept R11 (frame pointer)
if (!AFI->isThumbFunction() ||
+ (STI.isTargetWindows() && Reg == ARM::R11) ||
isARMLowRegister(Reg) || Reg == ARM::LR) {
SavedRegs.set(Reg);
if (!MRI.isReserved(Reg))
@@ -1784,8 +1743,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned Align = getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
+ Amount = alignSPAdjust(Amount);
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
@@ -1885,7 +1843,6 @@ void ARMFrameLowering::adjustForSegmentedStacks(
if (!ST->isTargetAndroid() && !ST->isTargetLinux())
report_fatal_error("Segmented stacks not supported on this platform.");
- assert(&PrologueMBB == &MF.front() && "Shrink-wrapping not yet implemented");
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
MCContext &Context = MMI.getContext();
@@ -1913,21 +1870,48 @@ void ARMFrameLowering::adjustForSegmentedStacks(
MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
- for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
- e = PrologueMBB.livein_end();
- i != e; ++i) {
- AllocMBB->addLiveIn(*i);
- GetMBB->addLiveIn(*i);
- McrMBB->addLiveIn(*i);
- PrevStackMBB->addLiveIn(*i);
- PostStackMBB->addLiveIn(*i);
+ // Grab everything that reaches PrologueMBB to update there liveness as well.
+ SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
+ SmallVector<MachineBasicBlock *, 2> WalkList;
+ WalkList.push_back(&PrologueMBB);
+
+ do {
+ MachineBasicBlock *CurMBB = WalkList.pop_back_val();
+ for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
+ if (BeforePrologueRegion.insert(PredBB).second)
+ WalkList.push_back(PredBB);
+ }
+ } while (!WalkList.empty());
+
+ // The order in that list is important.
+ // The blocks will all be inserted before PrologueMBB using that order.
+ // Therefore the block that should appear first in the CFG should appear
+ // first in the list.
+ MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
+ PostStackMBB};
+
+ for (MachineBasicBlock *B : AddedBlocks)
+ BeforePrologueRegion.insert(B);
+
+ for (const auto &LI : PrologueMBB.liveins()) {
+ for (MachineBasicBlock *PredBB : BeforePrologueRegion)
+ PredBB->addLiveIn(LI);
+ }
+
+ // Remove the newly added blocks from the list, since we know
+ // we do not have to do the following updates for them.
+ for (MachineBasicBlock *B : AddedBlocks) {
+ BeforePrologueRegion.erase(B);
+ MF.insert(PrologueMBB.getIterator(), B);
}
- MF.push_front(PostStackMBB);
- MF.push_front(AllocMBB);
- MF.push_front(GetMBB);
- MF.push_front(McrMBB);
- MF.push_front(PrevStackMBB);
+ for (MachineBasicBlock *MBB : BeforePrologueRegion) {
+ // Make sure the LiveIns are still sorted and unique.
+ MBB->sortUniqueLiveIns();
+ // Replace the edges to PrologueMBB by edges to the sequences
+ // we are about to add.
+ MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
+ }
// The required stack size that is aligned to ARM constant criterion.
AlignedStackSize = alignToARMConstant(StackSize);
@@ -1991,7 +1975,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0);
MachineConstantPool *MCP = MF.getConstantPool();
- unsigned CPI = MCP->getConstantPoolIndex(NewCPV, MF.getAlignment());
+ unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
// ldr SR0, [pc, offset(STACK_LIMIT)]
AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
index 6fdc5ef..66f4dfb 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -31,8 +31,6 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -52,7 +50,6 @@ public:
unsigned &FrameReg) const override;
int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg, int SPAdj) const;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
@@ -60,6 +57,11 @@ public:
void adjustForSegmentedStacks(MachineFunction &MF,
MachineBasicBlock &MBB) const override;
+ /// Returns true if the target will correctly handle shrink wrapping.
+ bool enableShrinkWrapping(const MachineFunction &MF) const override {
+ return true;
+ }
+
private:
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index b110628..0242440 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -160,11 +160,6 @@ public:
// Thumb Addressing Modes:
bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
- bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
- unsigned Scale);
- bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
- bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
- bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
SDValue &OffImm);
bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
@@ -176,8 +171,6 @@ public:
bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
// Thumb 2 Addressing Modes:
- bool SelectT2ShifterOperandReg(SDValue N,
- SDValue &BaseReg, SDValue &Opc);
bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
SDValue &OffImm);
@@ -278,6 +271,22 @@ private:
// Get the alignment operand for a NEON VLD or VST instruction.
SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
bool is64BitVector);
+
+ /// Returns the number of instructions required to materialize the given
+ /// constant in a register, or 3 if a literal pool load is needed.
+ unsigned ConstantMaterializationCost(unsigned Val) const;
+
+ /// Checks if N is a multiplication by a constant where we can extract out a
+ /// power of two from the constant so that it can be used in a shift, but only
+ /// if it simplifies the materialization of the constant. Returns true if it
+ /// is, and assigns to PowerOfTwo the power of two that should be extracted
+ /// out and to NewMulConst the new constant to be multiplied by.
+ bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
+ unsigned &PowerOfTwo, SDValue &NewMulConst) const;
+
+ /// Replace N with M in CurDAG, in a way that also ensures that M gets
+ /// selected when N would have been selected.
+ void replaceDAGValue(const SDValue &N, SDValue M);
};
}
@@ -334,7 +343,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
bool isThumb2 = Subtarget->isThumb();
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
- SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+ SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
if (N->getOpcode() != ISD::ADD)
continue;
@@ -388,7 +397,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
SDValue CPTmp1;
SDValue CPTmp2;
if (isThumb2) {
- if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
+ if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
continue;
} else {
if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
@@ -471,6 +480,61 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
(ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
}
+unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
+ if (Subtarget->isThumb()) {
+ if (Val <= 255) return 1; // MOV
+ if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
+ if (~Val <= 255) return 2; // MOV + MVN
+ if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
+ } else {
+ if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV
+ if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN
+ if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
+ if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
+ }
+ if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
+ return 3; // Literal pool load
+}
+
+bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
+ unsigned MaxShift,
+ unsigned &PowerOfTwo,
+ SDValue &NewMulConst) const {
+ assert(N.getOpcode() == ISD::MUL);
+ assert(MaxShift > 0);
+
+ // If the multiply is used in more than one place then changing the constant
+ // will make other uses incorrect, so don't.
+ if (!N.hasOneUse()) return false;
+ // Check if the multiply is by a constant
+ ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!MulConst) return false;
+ // If the constant is used in more than one place then modifying it will mean
+ // we need to materialize two constants instead of one, which is a bad idea.
+ if (!MulConst->hasOneUse()) return false;
+ unsigned MulConstVal = MulConst->getZExtValue();
+ if (MulConstVal == 0) return false;
+
+ // Find the largest power of 2 that MulConstVal is a multiple of
+ PowerOfTwo = MaxShift;
+ while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
+ --PowerOfTwo;
+ if (PowerOfTwo == 0) return false;
+ }
+
+ // Only optimise if the new cost is better
+ unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
+ NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
+ unsigned OldCost = ConstantMaterializationCost(MulConstVal);
+ unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
+ return NewCost < OldCost;
+}
+
+void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
+ CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
+ CurDAG->ReplaceAllUsesWith(N, M);
+}
+
bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
SDValue &BaseReg,
SDValue &Opc,
@@ -478,6 +542,24 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
if (DisableShifterOp)
return false;
+ // If N is a multiply-by-constant and it's profitable to extract a shift and
+ // use it in a shifted operand do so.
+ if (N.getOpcode() == ISD::MUL) {
+ unsigned PowerOfTwo = 0;
+ SDValue NewMulConst;
+ if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
+ BaseReg = SDValue(Select(CurDAG->getNode(ISD::MUL, SDLoc(N), MVT::i32,
+ N.getOperand(0), NewMulConst)
+ .getNode()),
+ 0);
+ replaceDAGValue(N.getOperand(1), NewMulConst);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
+ PowerOfTwo),
+ SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
// Don't match base register only case. That is matched to a separate
@@ -662,6 +744,18 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
}
}
+ // If Offset is a multiply-by-constant and it's profitable to extract a shift
+ // and use it in a shifted operand do so.
+ if (Offset.getOpcode() == ISD::MUL) {
+ unsigned PowerOfTwo = 0;
+ SDValue NewMulConst;
+ if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
+ replaceDAGValue(Offset.getOperand(1), NewMulConst);
+ ShAmt = PowerOfTwo;
+ ShOpcVal = ARM_AM::lsl;
+ }
+ }
+
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
SDLoc(N), MVT::i32);
return true;
@@ -1086,77 +1180,13 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
}
bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
- SDValue &Offset, unsigned Scale) {
- if (Scale == 4) {
- SDValue TmpBase, TmpOffImm;
- if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
- return false; // We want to select tLDRspi / tSTRspi instead.
-
- if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
- return false; // We want to select tLDRpci instead.
- }
-
- if (!CurDAG->isBaseWithConstantOffset(N))
- return false;
-
- // Thumb does not have [sp, r] address mode.
- RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
- RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
- if ((LHSR && LHSR->getReg() == ARM::SP) ||
- (RHSR && RHSR->getReg() == ARM::SP))
- return false;
-
- // FIXME: Why do we explicitly check for a match here and then return false?
- // Presumably to allow something else to match, but shouldn't this be
- // documented?
- int RHSC;
- if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
- return false;
-
- Base = N.getOperand(0);
- Offset = N.getOperand(1);
- return true;
-}
-
-bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
- SDValue &Base,
- SDValue &Offset) {
- return SelectThumbAddrModeRI(N, Base, Offset, 1);
-}
-
-bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
- SDValue &Base,
- SDValue &Offset) {
- return SelectThumbAddrModeRI(N, Base, Offset, 2);
-}
-
-bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
- SDValue &Base,
- SDValue &Offset) {
- return SelectThumbAddrModeRI(N, Base, Offset, 4);
-}
-
-bool
ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
SDValue &Base, SDValue &OffImm) {
- if (Scale == 4) {
- SDValue TmpBase, TmpOffImm;
- if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
- return false; // We want to select tLDRspi / tSTRspi instead.
-
- if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
- return false; // We want to select tLDRpci instead.
- }
-
if (!CurDAG->isBaseWithConstantOffset(N)) {
- if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ if (N.getOpcode() == ISD::ADD) {
+ return false; // We want to select register offset instead
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
} else {
Base = N;
@@ -1166,23 +1196,6 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
return true;
}
- RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
- RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
- if ((LHSR && LHSR->getReg() == ARM::SP) ||
- (RHSR && RHSR->getReg() == ARM::SP)) {
- ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
- ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
- unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
- unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
-
- // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
- if (LHSC != 0 || RHSC != 0) return false;
-
- Base = N;
- OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
- return true;
- }
-
// If the RHS is + imm5 * scale, fold into addr mode.
int RHSC;
if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
@@ -1191,9 +1204,8 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
return true;
}
- Base = N.getOperand(0);
- OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
- return true;
+ // Offset is too large, so use register offset instead.
+ return false;
}
bool
@@ -1263,28 +1275,6 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
//===----------------------------------------------------------------------===//
-bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
- SDValue &Opc) {
- if (DisableShifterOp)
- return false;
-
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
-
- // Don't match base register only case. That is matched to a separate
- // lower complexity pattern with explicit register operand.
- if (ShOpcVal == ARM_AM::no_shift) return false;
-
- BaseReg = N.getOperand(0);
- unsigned ShImmVal = 0;
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- ShImmVal = RHS->getZExtValue() & 31;
- Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), SDLoc(N));
- return true;
- }
-
- return false;
-}
-
bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
SDValue &Base, SDValue &OffImm) {
// Match simple R + imm12 operands.
@@ -1425,6 +1415,17 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
}
}
+ // If OffReg is a multiply-by-constant and it's profitable to extract a shift
+ // and use it in a shifted operand do so.
+ if (OffReg.getOpcode() == ISD::MUL) {
+ unsigned PowerOfTwo = 0;
+ SDValue NewMulConst;
+ if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
+ replaceDAGValue(OffReg.getOperand(1), NewMulConst);
+ ShAmt = PowerOfTwo;
+ }
+ }
+
ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
return true;
@@ -2503,25 +2504,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ISD::Constant: {
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
- bool UseCP = true;
- if (Subtarget->useMovt(*MF))
- // Thumb2-aware targets have the MOVT instruction, so all immediates can
- // be done with MOV + MOVT, at worst.
- UseCP = false;
- else {
- if (Subtarget->isThumb()) {
- UseCP = (Val > 255 && // MOV
- ~Val > 255 && // MOV + MVN
- !ARM_AM::isThumbImmShiftedVal(Val) && // MOV + LSL
- !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
- } else
- UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV
- ARM_AM::getSOImmVal(~Val) == -1 && // MVN
- !ARM_AM::isSOImmTwoPartVal(Val) && // two instrs.
- !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
- }
-
- if (UseCP) {
+ // If we can't materialize the constant we need to use a literal pool
+ if (ConstantMaterializationCost(Val) > 2) {
SDValue CPIdx = CurDAG->getTargetConstantPool(
ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
TLI->getPointerTy(CurDAG->getDataLayout()));
@@ -3376,7 +3360,7 @@ static void getIntOperandsFromRegisterString(StringRef RegString,
SelectionDAG *CurDAG, SDLoc DL,
std::vector<SDValue>& Ops) {
SmallVector<StringRef, 5> Fields;
- RegString.split(Fields, ":");
+ RegString.split(Fields, ':');
if (Fields.size() > 1) {
bool AllIntFields = true;
@@ -3461,9 +3445,9 @@ static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
// The flags here are common to those allowed for apsr in the A class cores and
// those allowed for the special registers in the M class cores. Returns a
// value representing which flags were present, -1 if invalid.
-static inline int getMClassFlagsMask(StringRef Flags) {
+static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
if (Flags.empty())
- return 0x3;
+ return 0x2 | (int)hasDSP;
return StringSwitch<int>(Flags)
.Case("g", 0x1)
@@ -3492,7 +3476,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
}
// We know we are now handling a write so need to get the mask for the flags.
- int Mask = getMClassFlagsMask(Flags);
+ int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
// Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
// shouldn't have flags present.
@@ -3501,7 +3485,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
// The _g and _nzcvqg versions are only valid if the DSP extension is
// available.
- if (!Subtarget->hasThumb2DSP() && (Mask & 0x2))
+ if (!Subtarget->hasDSP() && (Mask & 0x1))
return -1;
// The register was valid so need to put the mask in the correct place
@@ -3523,7 +3507,7 @@ static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
// The flags permitted for apsr are the same flags that are allowed in
// M class registers. We get the flag value and then shift the flags into
// the correct place to combine with the mask.
- Mask = getMClassFlagsMask(Flags);
+ Mask = getMClassFlagsMask(Flags, true);
if (Mask == -1)
return -1;
return Mask << 2;
@@ -3742,7 +3726,7 @@ SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
}
SmallVector<StringRef, 5> Fields;
- StringRef(SpecialReg).split(Fields, "_", 1, false);
+ StringRef(SpecialReg).split(Fields, '_', 1, false);
std::string Reg = Fields[0].str();
StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
@@ -3943,6 +3927,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
// be an immediate and not a memory constraint.
// Fallthrough.
case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_o:
case InlineAsm::Constraint_Q:
case InlineAsm::Constraint_Um:
case InlineAsm::Constraint_Un:
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 8cc06df..9cfb06b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -142,6 +142,11 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
+
+ if (!VT.isFloatingPoint() &&
+ VT != MVT::v2i64 && VT != MVT::v1i64)
+ for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+ setOperationAction(Opcode, VT, Legal);
}
void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
@@ -166,77 +171,78 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// Uses VFP for Thumb libfuncs if available.
if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
- // Single-precision floating-point arithmetic.
- setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
- setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
- setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
- setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
-
- // Double-precision floating-point arithmetic.
- setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
- setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
- setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
- setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
-
- // Single-precision comparisons.
- setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
- setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
- setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
- setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
- setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
- setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
- setLibcallName(RTLIB::UO_F32, "__unordsf2vfp");
- setLibcallName(RTLIB::O_F32, "__unordsf2vfp");
-
- setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
-
- // Double-precision comparisons.
- setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
- setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
- setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
- setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
- setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
- setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
- setLibcallName(RTLIB::UO_F64, "__unorddf2vfp");
- setLibcallName(RTLIB::O_F64, "__unorddf2vfp");
-
- setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
-
- // Floating-point to integer conversions.
- // i64 conversions are done via library routines even when generating VFP
- // instructions, so use the same ones.
- setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
- setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
- setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
- setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
-
- // Conversions between floating types.
- setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
- setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp");
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const ISD::CondCode Cond;
+ } LibraryCalls[] = {
+ // Single-precision floating-point arithmetic.
+ { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
+
+ // Double-precision floating-point arithmetic.
+ { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
+
+ // Single-precision comparisons.
+ { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
+ { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
+ { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
+ { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
+ { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
+ { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
+ { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
+ { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
+
+ // Double-precision comparisons.
+ { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
+ { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
+ { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
+ { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
+ { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
+ { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
+ { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
+ { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
+
+ // Floating-point to integer conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
+
+ // Conversions between floating types.
+ { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
+ { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
+
+ // Integer to floating-point conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ // FIXME: There appears to be some naming inconsistency in ARM libgcc:
+ // e.g., __floatunsidf vs. __floatunssidfvfp.
+ { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ if (LC.Cond != ISD::SETCC_INVALID)
+ setCmpLibcallCC(LC.Op, LC.Cond);
+ }
+ }
- // Integer to floating-point conversions.
- // i64 conversions are done via library routines even when generating VFP
- // instructions, so use the same ones.
- // FIXME: There appears to be some naming inconsistency in ARM libgcc:
- // e.g., __floatunsidf vs. __floatunssidfvfp.
- setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
- setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
- setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
- setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+ // Set the correct calling convention for ARMv7k WatchOS. It's just
+ // AAPCS_VFP for functions as simple as libcalls.
+ if (Subtarget->isTargetWatchOS()) {
+ for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
+ setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
}
}
@@ -245,8 +251,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
- if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() &&
- !Subtarget->isTargetWindows()) {
+ // RTLIB
+ if (Subtarget->isAAPCS_ABI() &&
+ (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
+ Subtarget->isTargetAndroid())) {
static const struct {
const RTLIB::Libcall Op;
const char * const Name;
@@ -334,12 +342,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-
- // Memory operations
- // RTABI chapter 4.3.4
- { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
};
for (const auto &LC : LibraryCalls) {
@@ -348,6 +350,30 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (LC.Cond != ISD::SETCC_INVALID)
setCmpLibcallCC(LC.Op, LC.Cond);
}
+
+ // EABI dependent RTLIB
+ if (TM.Options.EABIVersion == EABI::EABI4 ||
+ TM.Options.EABIVersion == EABI::EABI5) {
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char *const Name;
+ const CallingConv::ID CC;
+ const ISD::CondCode Cond;
+ } MemOpsLibraryCalls[] = {
+ // Memory operations
+ // RTABI chapter 4.3.4
+ { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ };
+
+ for (const auto &LC : MemOpsLibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ if (LC.Cond != ISD::SETCC_INVALID)
+ setCmpLibcallCC(LC.Op, LC.Cond);
+ }
+ }
}
if (Subtarget->isTargetWindows()) {
@@ -364,6 +390,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SDIV_I32, "__rt_sdiv", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UDIV_I32, "__rt_udiv", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SDIV_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UDIV_I64, "__rt_udiv64", CallingConv::ARM_AAPCS_VFP },
};
for (const auto &LC : LibraryCalls) {
@@ -373,8 +403,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
- if (Subtarget->getTargetTriple().isiOS() &&
- !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
+ if (Subtarget->isTargetWatchOS() ||
+ (Subtarget->isTargetIOS() &&
+ !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
}
@@ -392,6 +423,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
}
+ // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
+ // a __gnu_ prefix (which is the default).
+ if (Subtarget->isTargetAEABI()) {
+ setLibcallName(RTLIB::FPROUND_F32_F16, "__aeabi_f2h");
+ setLibcallName(RTLIB::FPROUND_F64_F16, "__aeabi_d2h");
+ setLibcallName(RTLIB::FPEXT_F16_F32, "__aeabi_h2f");
+ }
+
if (Subtarget->isThumb1Only())
addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
else
@@ -579,7 +618,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
- setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
@@ -605,7 +643,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::ADDC);
if (Subtarget->isFPOnlySP()) {
- // When targetting a floating-point unit with only single-precision
+ // When targeting a floating-point unit with only single-precision
// operations, f64 is legal for the few double-precision instructions which
// are present However, no double-precision operations other than moves,
// loads and stores are provided by the hardware.
@@ -689,7 +727,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
}
if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
- || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
+ || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
setOperationAction(ISD::MULHS, MVT::i32, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
@@ -706,8 +744,15 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SUBE, MVT::i32, Custom);
}
+ if (!Subtarget->isThumb1Only())
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
+
// ARM does not have ROTL.
- setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ for (MVT VT : MVT::vector_valuetypes()) {
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+ }
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
@@ -717,7 +762,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
- setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+ // @llvm.readcyclecounter requires the Performance Monitors extension.
+ // Default to the 0 expansion on unsupported platforms.
+ // FIXME: Technically there are older ARM CPUs that have
+ // implementation-specific ways of obtaining this information.
+ if (Subtarget->hasPerfMon())
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
// Only ARMv6 has BSWAP.
if (!Subtarget->hasV6Ops())
@@ -726,15 +776,17 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
!(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
// These are expanded into libcalls if the cpu doesn't have HW divider.
- setOperationAction(ISD::SDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, LibCall);
+ setOperationAction(ISD::UDIV, MVT::i32, LibCall);
}
- // FIXME: Also set divmod for SREM on EABI
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
// Register based DivRem for AEABI (RTABI 4.2)
- if (Subtarget->isTargetAEABI()) {
+ if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) {
+ setOperationAction(ISD::SREM, MVT::i64, Custom);
+ setOperationAction(ISD::UREM, MVT::i64, Custom);
+
setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod");
setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
@@ -762,7 +814,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
- setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
@@ -776,13 +827,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- if (!Subtarget->isTargetMachO()) {
- // Non-MachO platforms may return values in these registers via the
- // personality function.
- setExceptionPointerRegister(ARM::R0);
- setExceptionSelectorRegister(ARM::R1);
- }
-
if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
else
@@ -849,11 +893,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- if (Subtarget->isTargetDarwin()) {
- setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
- setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
+ if (Subtarget->useSjLjEH())
setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
- }
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
@@ -912,7 +956,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
- if (Subtarget->getTargetTriple().isiOS()) {
+ if (Subtarget->isTargetWatchOS()) {
+ setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
+ setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
+ }
+ if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
// For iOS, we don't want to the normal expansion of a libcall to
// sincos. We want to issue a libcall to __sincos_stret.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
@@ -928,6 +976,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+
if (!Subtarget->isFPOnlySP()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
@@ -935,8 +990,22 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
}
}
+
+ if (Subtarget->hasNEON()) {
+ // vmin and vmax aren't available in a scalar form, so we use
+ // a NEON instruction with an undef lane instead.
+ setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
+ }
+
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setTargetDAGCombine(ISD::ADD);
@@ -959,11 +1028,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
//// temporary - rewrite interface to use type
MaxStoresPerMemset = 8;
- MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemsetOptSize = 4;
MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
- MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemcpyOptSize = 2;
MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
- MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemmoveOptSize = 2;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
@@ -1054,8 +1123,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CMOV: return "ARMISD::CMOV";
- case ARMISD::RBIT: return "ARMISD::RBIT";
-
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
@@ -1069,7 +1136,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
- case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+ case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
+ case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
@@ -1082,6 +1150,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK";
+ case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
@@ -1133,14 +1202,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
- case ARMISD::FMAX: return "ARMISD::FMAX";
- case ARMISD::FMIN: return "ARMISD::FMIN";
- case ARMISD::VMAXNM: return "ARMISD::VMAX";
- case ARMISD::VMINNM: return "ARMISD::VMIN";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
case ARMISD::VBSL: return "ARMISD::VBSL";
+ case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
@@ -1449,9 +1515,10 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
StackPtr, PtrOff);
- return DAG.getStore(Chain, dl, Arg, PtrOff,
- MachinePointerInfo::getStack(LocMemOffset),
- false, false, 0);
+ return DAG.getStore(
+ Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
+ false, false, 0);
}
void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
@@ -1734,9 +1801,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Get the address of the callee into a register
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(), false, false,
- false, 0);
+ Callee = DAG.getLoad(
+ PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
@@ -1748,9 +1816,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Get the address of the callee into a register
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(), false, false,
- false, 0);
+ Callee = DAG.getLoad(
+ PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
}
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
@@ -1768,7 +1837,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMISD::WrapperPIC, dl, PtrVt,
DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
- MachinePointerInfo::getGOT(), false, false, true, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, true, 0);
} else if (Subtarget->isTargetCOFF()) {
assert(Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target");
@@ -1781,7 +1851,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Callee =
DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
} else {
// On ELF targets for PIC code, direct calls should go through the PLT
unsigned OpFlags = 0;
@@ -1804,9 +1875,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMPCLabelIndex, 4);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(), false, false,
- false, 0);
+ Callee = DAG.getLoad(
+ PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
} else {
@@ -1821,7 +1893,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// FIXME: handle tail calls differently.
unsigned CallOpc;
- bool HasMinSizeAttr = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
@@ -1831,8 +1902,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (!isDirect && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
- // Emit regular call when code size is the priority
- !HasMinSizeAttr)
+ // Emit regular call when code size is the priority
+ !MF.getFunction()->optForMinSize())
// "mov lr, pc; b _foo" to avoid confusing the RSP
CallOpc = ARMISD::CALL_NOLINK;
else
@@ -2014,6 +2085,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
+ assert(Subtarget->supportsTailCall());
+
// Look for obvious safe cases to perform tail call optimization that do not
// require ABI changes. This is what gcc calls sibcall.
@@ -2033,26 +2106,6 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (isCalleeStructRet || isCallerStructRet)
return false;
- // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
- // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
- // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
- // support in the assembler and linker to be used. This would need to be
- // fixed to fully support tail calls in Thumb1.
- //
- // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
- // LR. This means if we need to reload LR, it takes an extra instructions,
- // which outweighs the value of the tail call; but here we don't know yet
- // whether LR is going to be used. Probably the right approach is to
- // generate the tail call here and turn it back into CALL/RET in
- // emitEpilogue if LR is used.
-
- // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
- // but we need to make sure there are enough registers; the only valid
- // registers are the 4 used for parameters. We don't currently do this
- // case.
- if (Subtarget->isThumb1Only())
- return false;
-
// Externally-defined functions with weak linkage should not be
// tail-called on ARM when the OS does not support dynamic
// pre-emption of symbols, as the AAELF spec requires normal calls
@@ -2400,7 +2453,7 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
if (!CI->isTailCall() || Attr.getValueAsString() == "true")
return false;
- return !Subtarget->isThumb1Only();
+ return true;
}
// Trying to write a 64 bit value so need to split into two 32 bit values first,
@@ -2467,9 +2520,10 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
}
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
- SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ SDValue Result =
+ DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 0);
if (RelocM == Reloc::Static)
return Result;
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
@@ -2491,9 +2545,10 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
- Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Argument =
+ DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 0);
SDValue Chain = Argument.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
@@ -2543,17 +2598,19 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
true);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
- Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
Chain = Offset.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
- Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
} else {
// local exec model
assert(model == TLSModel::LocalExec);
@@ -2561,9 +2618,10 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
- Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
}
// The address of the thread local variable is the add of the thread
@@ -2577,6 +2635,8 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() &&
"TLS not implemented for non-ELF targets");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
@@ -2597,22 +2657,31 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
- bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
- ARMConstantPoolValue *CPV =
- ARMConstantPoolConstant::Create(GV,
- UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+ bool UseGOT_PREL =
+ !(GV->hasHiddenVisibility() || GV->hasLocalLinkage());
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDLoc dl(Op);
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
+ GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
+ UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
+ /*AddCurrentAddress=*/UseGOT_PREL);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
- CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ SDValue Result = DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
SDValue Chain = Result.getValue(1);
- SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
- Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
- if (!UseGOTOFF)
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ if (UseGOT_PREL)
Result = DAG.getLoad(PtrVT, dl, Chain, Result,
- MachinePointerInfo::getGOT(),
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
false, false, false, 0);
return Result;
}
@@ -2628,9 +2697,10 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
} else {
SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ return DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
}
}
@@ -2654,7 +2724,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
return Result;
}
@@ -2680,32 +2751,11 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
TargetFlags));
if (GV->hasDLLImportStorageClass())
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
return Result;
}
-SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
- SelectionDAG &DAG) const {
- assert(Subtarget->isTargetELF() &&
- "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
- MachineFunction &MF = DAG.getMachineFunction();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDLoc dl(Op);
- unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
- ARMConstantPoolValue *CPV =
- ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
- ARMPCLabelIndex, PCAdj);
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
- CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
- return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
-}
-
SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -2722,6 +2772,13 @@ ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
}
+SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
+ Op.getOperand(0));
+}
+
SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const {
@@ -2732,7 +2789,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
case Intrinsic::arm_rbit: {
assert(Op.getOperand(1).getValueType() == MVT::i32 &&
"RBIT intrinsic must have i32 type!");
- return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1));
+ return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1));
}
case Intrinsic::arm_thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -2752,10 +2809,10 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
ARMCP::CPLSDA, PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- SDValue Result =
- DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ SDValue Result = DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
if (RelocM == Reloc::PIC_) {
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
@@ -2770,6 +2827,36 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
+ case Intrinsic::arm_neon_vminnm:
+ case Intrinsic::arm_neon_vmaxnm: {
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
+ ? ISD::FMINNUM : ISD::FMAXNUM;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ case Intrinsic::arm_neon_vminu:
+ case Intrinsic::arm_neon_vmaxu: {
+ if (Op.getValueType().isFloatingPoint())
+ return SDValue();
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
+ ? ISD::UMIN : ISD::UMAX;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ case Intrinsic::arm_neon_vmins:
+ case Intrinsic::arm_neon_vmaxs: {
+ // v{min,max}s is overloaded between signed integers and floats.
+ if (!Op.getValueType().isFloatingPoint()) {
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
+ ? ISD::SMIN : ISD::SMAX;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
+ ? ISD::FMINNAN : ISD::FMAXNAN;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
}
}
@@ -2870,9 +2957,10 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
// Create load node to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ ArgValue2 = DAG.getLoad(
+ MVT::i32, dl, Root, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+ false, false, 0);
} else {
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
@@ -3056,9 +3144,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
if (VA.isMemLoc()) {
int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ ArgValue2 = DAG.getLoad(
+ MVT::f64, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, false, 0);
} else {
ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
Chain, DAG, dl);
@@ -3139,9 +3228,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
"Byval arguments cannot be implicit");
unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
- int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,
- CurByValIndex, VA.getLocMemOffset(),
- Flags.getByValSize());
+ int FrameIndex = StoreByValRegs(
+ CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
+ VA.getLocMemOffset(), Flags.getByValSize());
InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
CCInfo.nextInRegsParam();
} else {
@@ -3151,9 +3240,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0));
+ InVals.push_back(DAG.getLoad(
+ VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, false, 0));
}
lastInsIndex = index;
}
@@ -3188,13 +3278,9 @@ static bool isFloatingPointZero(SDValue Op) {
// Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
// created by LowerConstantFP().
SDValue BitcastOp = Op->getOperand(0);
- if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) {
- SDValue MoveOp = BitcastOp->getOperand(0);
- if (MoveOp->getOpcode() == ISD::TargetConstant &&
- cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) {
- return true;
- }
- }
+ if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
+ isNullConstant(BitcastOp->getOperand(0)))
+ return true;
}
return false;
}
@@ -3559,113 +3645,6 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// Try to generate VMAXNM/VMINNM on ARMv8.
if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
TrueVal.getValueType() == MVT::f64)) {
- // We can use VMAXNM/VMINNM for a compare followed by a select with the
- // same operands, as follows:
- // c = fcmp [?gt, ?ge, ?lt, ?le] a, b
- // select c, a, b
- // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'.
- bool swapSides = false;
- if (!getTargetMachine().Options.NoNaNsFPMath) {
- // transformability may depend on which way around we compare
- switch (CC) {
- default:
- break;
- case ISD::SETOGT:
- case ISD::SETOGE:
- case ISD::SETOLT:
- case ISD::SETOLE:
- // the non-NaN should be RHS
- swapSides = DAG.isKnownNeverNaN(LHS) && !DAG.isKnownNeverNaN(RHS);
- break;
- case ISD::SETUGT:
- case ISD::SETUGE:
- case ISD::SETULT:
- case ISD::SETULE:
- // the non-NaN should be LHS
- swapSides = DAG.isKnownNeverNaN(RHS) && !DAG.isKnownNeverNaN(LHS);
- break;
- }
- }
- swapSides = swapSides || (LHS == FalseVal && RHS == TrueVal);
- if (swapSides) {
- CC = ISD::getSetCCSwappedOperands(CC);
- std::swap(LHS, RHS);
- }
- if (LHS == TrueVal && RHS == FalseVal) {
- bool canTransform = true;
- // FIXME: FastMathFlags::noSignedZeros() doesn't appear reachable from here
- if (!getTargetMachine().Options.UnsafeFPMath &&
- !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
- const ConstantFPSDNode *Zero;
- switch (CC) {
- default:
- break;
- case ISD::SETOGT:
- case ISD::SETUGT:
- case ISD::SETGT:
- // RHS must not be -0
- canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) &&
- !Zero->isNegative();
- break;
- case ISD::SETOGE:
- case ISD::SETUGE:
- case ISD::SETGE:
- // LHS must not be -0
- canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) &&
- !Zero->isNegative();
- break;
- case ISD::SETOLT:
- case ISD::SETULT:
- case ISD::SETLT:
- // RHS must not be +0
- canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) &&
- Zero->isNegative();
- break;
- case ISD::SETOLE:
- case ISD::SETULE:
- case ISD::SETLE:
- // LHS must not be +0
- canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) &&
- Zero->isNegative();
- break;
- }
- }
- if (canTransform) {
- // Note: If one of the elements in a pair is a number and the other
- // element is NaN, the corresponding result element is the number.
- // This is consistent with the IEEE 754-2008 standard.
- // Therefore, a > b ? a : b <=> vmax(a,b), if b is constant and a is NaN
- switch (CC) {
- default:
- break;
- case ISD::SETOGT:
- case ISD::SETOGE:
- if (!DAG.isKnownNeverNaN(RHS))
- break;
- return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS);
- case ISD::SETUGT:
- case ISD::SETUGE:
- if (!DAG.isKnownNeverNaN(LHS))
- break;
- case ISD::SETGT:
- case ISD::SETGE:
- return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS);
- case ISD::SETOLT:
- case ISD::SETOLE:
- if (!DAG.isKnownNeverNaN(RHS))
- break;
- return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS);
- case ISD::SETULT:
- case ISD::SETULE:
- if (!DAG.isKnownNeverNaN(LHS))
- break;
- case ISD::SETLT:
- case ISD::SETLE:
- return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS);
- }
- }
- }
-
bool swpCmpOps = false;
bool swpVselOps = false;
checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
@@ -3890,16 +3869,18 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
Addr, Op.getOperand(2), JTI);
}
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
- Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
- MachinePointerInfo::getJumpTable(),
- false, false, false, 0);
+ Addr =
+ DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
+ false, false, false, 0);
Chain = Addr.getValue(1);
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
} else {
- Addr = DAG.getLoad(PTy, dl, Chain, Addr,
- MachinePointerInfo::getJumpTable(),
- false, false, false, 0);
+ Addr =
+ DAG.getLoad(PTy, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
+ false, false, false, 0);
Chain = Addr.getValue(1);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
}
@@ -3936,7 +3917,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
else
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
Op.getValueType());
- return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+ return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
/*isSigned*/ false, SDLoc(Op)).first;
}
@@ -3988,7 +3969,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
else
LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
Op.getValueType());
- return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+ return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
/*isSigned*/ false, SDLoc(Op)).first;
}
@@ -4153,6 +4134,56 @@ static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
Results.push_back(Read.getOperand(0));
}
+/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
+/// When \p DstVT, the destination type of \p BC, is on the vector
+/// register bank and the source of bitcast, \p Op, operates on the same bank,
+/// it might be possible to combine them, such that everything stays on the
+/// vector register bank.
+/// \p return The node that would replace \p BT, if the combine
+/// is possible.
+static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
+ SelectionDAG &DAG) {
+ SDValue Op = BC->getOperand(0);
+ EVT DstVT = BC->getValueType(0);
+
+ // The only vector instruction that can produce a scalar (remember,
+ // since the bitcast was about to be turned into VMOVDRR, the source
+ // type is i64) from a vector is EXTRACT_VECTOR_ELT.
+ // Moreover, we can do this combine only if there is one use.
+ // Finally, if the destination type is not a vector, there is not
+ // much point on forcing everything on the vector bank.
+ if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !Op.hasOneUse())
+ return SDValue();
+
+ // If the index is not constant, we will introduce an additional
+ // multiply that will stick.
+ // Give up in that case.
+ ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!Index)
+ return SDValue();
+ unsigned DstNumElt = DstVT.getVectorNumElements();
+
+ // Compute the new index.
+ const APInt &APIntIndex = Index->getAPIntValue();
+ APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
+ NewIndex *= APIntIndex;
+ // Check if the new constant index fits into i32.
+ if (NewIndex.getBitWidth() > 32)
+ return SDValue();
+
+ // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
+ // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
+ SDLoc dl(Op);
+ SDValue ExtractSrc = Op.getOperand(0);
+ EVT VecVT = EVT::getVectorVT(
+ *DAG.getContext(), DstVT.getScalarType(),
+ ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
+ SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
+ DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
+}
+
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
@@ -4172,6 +4203,11 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
// Turn i64->f64 into VMOVDRR.
if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
+ // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
+ // if we can combine the bitcast with its source.
+ if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
+ return Val;
+
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
DAG.getConstant(0, dl, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
@@ -4383,7 +4419,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
if (!ST->hasV6T2Ops())
return SDValue();
- SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
+ SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
}
@@ -4544,8 +4580,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
"Unknown shift to lower!");
// We only lower SRA, SRL of 1 here, all others use generic lowering.
- if (!isa<ConstantSDNode>(N->getOperand(1)) ||
- cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
+ if (!isOneConstant(N->getOperand(1)))
return SDValue();
// If we are in thumb mode, we don't have RRX.
@@ -5036,18 +5071,56 @@ static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
return VT == MVT::v8i8 && M.size() == 8;
}
+// Checks whether the shuffle mask represents a vector transpose (VTRN) by
+// checking that pairs of elements in the shuffle mask represent the same index
+// in each vector, incrementing the expected index by 2 at each step.
+// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
+// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
+// v2={e,f,g,h}
+// WhichResult gives the offset for each element in the mask based on which
+// of the two results it belongs to.
+//
+// The transpose can be represented either as:
+// result1 = shufflevector v1, v2, result1_shuffle_mask
+// result2 = shufflevector v1, v2, result2_shuffle_mask
+// where v1/v2 and the shuffle masks have the same number of elements
+// (here WhichResult (see below) indicates which result is being checked)
+//
+// or as:
+// results = shufflevector v1, v2, shuffle_mask
+// where both results are returned in one vector and the shuffle mask has twice
+// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
+// want to check the low half and high half of the shuffle mask as if it were
+// the other case
static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i < NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
- (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
- return false;
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ // If the mask is twice as long as the input vector then we need to check the
+ // upper and lower parts of the mask with a matching value for WhichResult
+ // FIXME: A mask with only even values will be rejected in case the first
+ // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
+ // M[0] is used to determine WhichResult
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
+ return false;
+ }
}
+
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
return true;
}
@@ -5060,28 +5133,55 @@ static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return false;
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i < NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
- (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
- return false;
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
+ return false;
+ }
}
+
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
return true;
}
+// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
+// that the mask elements are either all even and in steps of size 2 or all odd
+// and in steps of size 2.
+// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
+// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
+// v2={e,f,g,h}
+// Requires similar checks to that of isVTRNMask with
+// respect the how results are returned.
static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i != NumElts; ++i) {
- if (M[i] < 0) continue; // ignore UNDEF indices
- if ((unsigned) M[i] != 2 * i + WhichResult)
- return false;
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; ++j) {
+ if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
+ return false;
+ }
}
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
@@ -5097,18 +5197,27 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
if (EltSz == 64)
return false;
- unsigned Half = VT.getVectorNumElements() / 2;
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned j = 0; j != 2; ++j) {
- unsigned Idx = WhichResult;
- for (unsigned i = 0; i != Half; ++i) {
- int MIdx = M[i + j * Half];
- if (MIdx >= 0 && (unsigned) MIdx != Idx)
- return false;
- Idx += 2;
+ unsigned NumElts = VT.getVectorNumElements();
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ unsigned Half = NumElts / 2;
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; j += Half) {
+ unsigned Idx = WhichResult;
+ for (unsigned k = 0; k < Half; ++k) {
+ int MIdx = M[i + j + k];
+ if (MIdx >= 0 && (unsigned) MIdx != Idx)
+ return false;
+ Idx += 2;
+ }
}
}
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
@@ -5116,21 +5225,37 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return true;
}
+// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
+// that pairs of elements of the shufflemask represent the same index in each
+// vector incrementing sequentially through the vectors.
+// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
+// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
+// v2={e,f,g,h}
+// Requires similar checks to that of isVTRNMask with respect the how results
+// are returned.
static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- unsigned Idx = WhichResult * NumElts / 2;
- for (unsigned i = 0; i != NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
- (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
- return false;
- Idx += 1;
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
+ return false;
+ Idx += 1;
+ }
}
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
@@ -5147,15 +5272,23 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return false;
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- unsigned Idx = WhichResult * NumElts / 2;
- for (unsigned i = 0; i != NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
- (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
- return false;
- Idx += 1;
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
+ return false;
+ Idx += 1;
+ }
}
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
@@ -5329,16 +5462,14 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// just use VDUPLANE. We can only do this if the lane being extracted
// is at a constant index, as the VDUP from lane instructions only have
// constant-index forms.
+ ConstantSDNode *constIndex;
if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- isa<ConstantSDNode>(Value->getOperand(1))) {
+ (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
// We need to create a new undef vector to use for the VDUPLANE if the
// size of the vector from which we get the value is different than the
// size of the vector that we need to create. We will insert the element
// such that the register coalescer will remove unnecessary copies.
if (VT != Value->getOperand(0).getValueType()) {
- ConstantSDNode *constIndex;
- constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
- assert(constIndex && "The index is not a constant!");
unsigned index = constIndex->getAPIntValue().getLimitedValue() %
VT.getVectorNumElements();
N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
@@ -5437,14 +5568,35 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// shuffle in combination with VEXTs.
SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
- SmallVector<SDValue, 2> SourceVecs;
- SmallVector<unsigned, 2> MinElts;
- SmallVector<unsigned, 2> MaxElts;
+ struct ShuffleSourceInfo {
+ SDValue Vec;
+ unsigned MinElt;
+ unsigned MaxElt;
+
+ // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
+ // be compatible with the shuffle we intend to construct. As a result
+ // ShuffleVec will be some sliding window into the original Vec.
+ SDValue ShuffleVec;
+
+ // Code should guarantee that element i in Vec starts at element "WindowBase
+ // + i * WindowScale in ShuffleVec".
+ int WindowBase;
+ int WindowScale;
+
+ bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
+ ShuffleSourceInfo(SDValue Vec)
+ : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
+ WindowScale(1) {}
+ };
+ // First gather all vectors used as an immediate source for this BUILD_VECTOR
+ // node.
+ SmallVector<ShuffleSourceInfo, 2> Sources;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.getOpcode() == ISD::UNDEF)
@@ -5453,127 +5605,166 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// A shuffle can only come from building a vector from various
// elements of other vectors.
return SDValue();
- } else if (V.getOperand(0).getValueType().getVectorElementType() !=
- VT.getVectorElementType()) {
- // This code doesn't know how to handle shuffles where the vector
- // element types do not match (this happens because type legalization
- // promotes the return type of EXTRACT_VECTOR_ELT).
- // FIXME: It might be appropriate to extend this code to handle
- // mismatched types.
+ } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
+ // Furthermore, shuffles require a constant mask, whereas extractelts
+ // accept variable indices.
return SDValue();
}
- // Record this extraction against the appropriate vector if possible...
+ // Add this element source to the list if it's not already there.
SDValue SourceVec = V.getOperand(0);
- // If the element number isn't a constant, we can't effectively
- // analyze what's going on.
- if (!isa<ConstantSDNode>(V.getOperand(1)))
- return SDValue();
- unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
- bool FoundSource = false;
- for (unsigned j = 0; j < SourceVecs.size(); ++j) {
- if (SourceVecs[j] == SourceVec) {
- if (MinElts[j] > EltNo)
- MinElts[j] = EltNo;
- if (MaxElts[j] < EltNo)
- MaxElts[j] = EltNo;
- FoundSource = true;
- break;
- }
- }
+ auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
+ if (Source == Sources.end())
+ Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
- // Or record a new source if not...
- if (!FoundSource) {
- SourceVecs.push_back(SourceVec);
- MinElts.push_back(EltNo);
- MaxElts.push_back(EltNo);
- }
+ // Update the minimum and maximum lane number seen.
+ unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+ Source->MinElt = std::min(Source->MinElt, EltNo);
+ Source->MaxElt = std::max(Source->MaxElt, EltNo);
}
// Currently only do something sane when at most two source vectors
- // involved.
- if (SourceVecs.size() > 2)
+ // are involved.
+ if (Sources.size() > 2)
return SDValue();
- SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
- int VEXTOffsets[2] = {0, 0};
+ // Find out the smallest element size among result and two sources, and use
+ // it as element size to build the shuffle_vector.
+ EVT SmallestEltTy = VT.getVectorElementType();
+ for (auto &Source : Sources) {
+ EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
+ if (SrcEltTy.bitsLT(SmallestEltTy))
+ SmallestEltTy = SrcEltTy;
+ }
+ unsigned ResMultiplier =
+ VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
+ NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
+ EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
+
+ // If the source vector is too wide or too narrow, we may nevertheless be able
+ // to construct a compatible shuffle either by concatenating it with UNDEF or
+ // extracting a suitable range of elements.
+ for (auto &Src : Sources) {
+ EVT SrcVT = Src.ShuffleVec.getValueType();
+
+ if (SrcVT.getSizeInBits() == VT.getSizeInBits())
+ continue;
+
+ // This stage of the search produces a source with the same element type as
+ // the original, but with a total width matching the BUILD_VECTOR output.
+ EVT EltVT = SrcVT.getVectorElementType();
+ unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
+ EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
- // This loop extracts the usage patterns of the source vectors
- // and prepares appropriate SDValues for a shuffle if possible.
- for (unsigned i = 0; i < SourceVecs.size(); ++i) {
- if (SourceVecs[i].getValueType() == VT) {
- // No VEXT necessary
- ShuffleSrcs[i] = SourceVecs[i];
- VEXTOffsets[i] = 0;
+ if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
+ if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+ // We can pad out the smaller vector for free, so if it's part of a
+ // shuffle...
+ Src.ShuffleVec =
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
+ DAG.getUNDEF(Src.ShuffleVec.getValueType()));
continue;
- } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
- // It probably isn't worth padding out a smaller vector just to
- // break it down again in a shuffle.
- return SDValue();
}
- // Since only 64-bit and 128-bit vectors are legal on ARM and
- // we've eliminated the other cases...
- assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
- "unexpected vector sizes in ReconstructShuffle");
+ if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
+ return SDValue();
- if (MaxElts[i] - MinElts[i] >= NumElts) {
+ if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
// Span too large for a VEXT to cope
return SDValue();
}
- if (MinElts[i] >= NumElts) {
+ if (Src.MinElt >= NumSrcElts) {
// The extraction can just take the second half
- VEXTOffsets[i] = NumElts;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i],
- DAG.getIntPtrConstant(NumElts, dl));
- } else if (MaxElts[i] < NumElts) {
+ Src.ShuffleVec =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(NumSrcElts, dl, MVT::i32));
+ Src.WindowBase = -NumSrcElts;
+ } else if (Src.MaxElt < NumSrcElts) {
// The extraction can just take the first half
- VEXTOffsets[i] = 0;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i],
- DAG.getIntPtrConstant(0, dl));
+ Src.ShuffleVec =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(0, dl, MVT::i32));
} else {
// An actual VEXT is needed
- VEXTOffsets[i] = MinElts[i];
- SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i],
- DAG.getIntPtrConstant(0, dl));
- SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i],
- DAG.getIntPtrConstant(NumElts, dl));
- ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
- DAG.getConstant(VEXTOffsets[i], dl,
- MVT::i32));
- }
- }
-
- SmallVector<int, 8> Mask;
-
- for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue VEXTSrc1 =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue VEXTSrc2 =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(NumSrcElts, dl, MVT::i32));
+
+ Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
+ VEXTSrc2,
+ DAG.getConstant(Src.MinElt, dl, MVT::i32));
+ Src.WindowBase = -Src.MinElt;
+ }
+ }
+
+ // Another possible incompatibility occurs from the vector element types. We
+ // can fix this by bitcasting the source vectors to the same type we intend
+ // for the shuffle.
+ for (auto &Src : Sources) {
+ EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
+ if (SrcEltTy == SmallestEltTy)
+ continue;
+ assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
+ Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
+ Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
+ Src.WindowBase *= Src.WindowScale;
+ }
+
+ // Final sanity check before we try to actually produce a shuffle.
+ DEBUG(
+ for (auto Src : Sources)
+ assert(Src.ShuffleVec.getValueType() == ShuffleVT);
+ );
+
+ // The stars all align, our next step is to produce the mask for the shuffle.
+ SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
+ int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
SDValue Entry = Op.getOperand(i);
- if (Entry.getOpcode() == ISD::UNDEF) {
- Mask.push_back(-1);
+ if (Entry.getOpcode() == ISD::UNDEF)
continue;
- }
- SDValue ExtractVec = Entry.getOperand(0);
- int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
- .getOperand(1))->getSExtValue();
- if (ExtractVec == SourceVecs[0]) {
- Mask.push_back(ExtractElt - VEXTOffsets[0]);
- } else {
- Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
- }
+ auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
+ int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
+
+ // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
+ // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
+ // segment.
+ EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
+ int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
+ VT.getVectorElementType().getSizeInBits());
+ int LanesDefined = BitsDefined / BitsPerShuffleLane;
+
+ // This source is expected to fill ResMultiplier lanes of the final shuffle,
+ // starting at the appropriate offset.
+ int *LaneMask = &Mask[i * ResMultiplier];
+
+ int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
+ ExtractBase += NumElts * (Src - Sources.begin());
+ for (int j = 0; j < LanesDefined; ++j)
+ LaneMask[j] = ExtractBase + j;
}
// Final check before we try to produce nonsense...
- if (isShuffleMaskLegal(Mask, VT))
- return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
- &Mask[0]);
+ if (!isShuffleMaskLegal(Mask, ShuffleVT))
+ return SDValue();
- return SDValue();
+ // We can't handle more than two sources. This should have already
+ // been checked before this point.
+ assert(Sources.size() <= 2 && "Too many sources!");
+
+ SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
+ for (unsigned i = 0; i < Sources.size(); ++i)
+ ShuffleOps[i] = Sources[i].ShuffleVec;
+
+ SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
+ ShuffleOps[1], &Mask[0]);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
}
/// isShuffleMaskLegal - Targets can use this to indicate that they only
@@ -6235,6 +6426,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
static SDValue
LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
+ // TODO: Should this propagate fast-math-flags?
+
// Convert to float
// float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
// float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
@@ -6265,6 +6458,8 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
static SDValue
LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
+ // TODO: Should this propagate fast-math-flags?
+
SDValue N2;
// Convert to float.
// float4 yf = vcvt_f32_s32(vmovl_s16(y));
@@ -6337,6 +6532,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
}
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+ // TODO: Should this propagate fast-math-flags?
EVT VT = Op.getValueType();
assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
"unexpected type for custom-lowering ISD::UDIV");
@@ -6445,45 +6641,56 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
auto PtrVT = getPointerTy(DAG.getDataLayout());
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Pair of floats / doubles used to pass the result.
- StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
-
- // Create stack object for sret.
+ Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
auto &DL = DAG.getDataLayout();
- const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
- const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
- int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
- SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL));
ArgListTy Args;
- ArgListEntry Entry;
-
- Entry.Node = SRet;
- Entry.Ty = RetTy->getPointerTo();
- Entry.isSExt = false;
- Entry.isZExt = false;
- Entry.isSRet = true;
- Args.push_back(Entry);
+ bool ShouldUseSRet = Subtarget->isAPCS_ABI();
+ SDValue SRet;
+ if (ShouldUseSRet) {
+ // Create stack object for sret.
+ const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
+ const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
+
+ ArgListEntry Entry;
+ Entry.Node = SRet;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isSRet = true;
+ Args.push_back(Entry);
+ RetTy = Type::getVoidTy(*DAG.getContext());
+ }
+ ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
Entry.isSExt = false;
Entry.isZExt = false;
Args.push_back(Entry);
- const char *LibcallName = (ArgVT == MVT::f64)
- ? "__sincos_stret" : "__sincosf_stret";
+ const char *LibcallName =
+ (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
+ RTLIB::Libcall LC =
+ (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32;
+ CallingConv::ID CC = getLibcallCallingConv(LC);
SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee,
- std::move(Args), 0)
- .setDiscardResult();
-
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setCallee(CC, RetTy, Callee, std::move(Args), 0)
+ .setDiscardResult(ShouldUseSRet);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ if (!ShouldUseSRet)
+ return CallResult.first;
+
SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
MachinePointerInfo(), false, false, false, 0);
@@ -6498,6 +6705,85 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
LoadSin.getValue(0), LoadCos.getValue(0));
}
+SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
+ bool Signed,
+ SDValue &Chain) const {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::i32 || VT == MVT::i64) &&
+ "unexpected type for custom lowering DIV");
+ SDLoc dl(Op);
+
+ const auto &DL = DAG.getDataLayout();
+ const auto &TLI = DAG.getTargetLoweringInfo();
+
+ const char *Name = nullptr;
+ if (Signed)
+ Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
+ else
+ Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
+
+ SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
+
+ ARMTargetLowering::ArgListTy Args;
+
+ for (auto AI : {1, 0}) {
+ ArgListEntry Arg;
+ Arg.Node = Op.getOperand(AI);
+ Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Args.push_back(Arg);
+ }
+
+ CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
+ ES, std::move(Args), 0);
+
+ return LowerCallTo(CLI).first;
+}
+
+SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
+ bool Signed) const {
+ assert(Op.getValueType() == MVT::i32 &&
+ "unexpected type for custom lowering DIV");
+ SDLoc dl(Op);
+
+ SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
+ DAG.getEntryNode(), Op.getOperand(1));
+
+ return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
+}
+
+void ARMTargetLowering::ExpandDIV_Windows(
+ SDValue Op, SelectionDAG &DAG, bool Signed,
+ SmallVectorImpl<SDValue> &Results) const {
+ const auto &DL = DAG.getDataLayout();
+ const auto &TLI = DAG.getTargetLoweringInfo();
+
+ assert(Op.getValueType() == MVT::i64 &&
+ "unexpected type for custom lowering DIV");
+ SDLoc dl(Op);
+
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1),
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1),
+ DAG.getConstant(1, dl, MVT::i32));
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i32, Lo, Hi);
+
+ SDValue DBZCHK =
+ DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, DAG.getEntryNode(), Or);
+
+ SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
+
+ SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
+ SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
+ DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
+ Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
+
+ Results.push_back(Lower);
+ Results.push_back(Upper);
+}
+
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
// Monotonic load/store is legal for all targets
if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
@@ -6513,36 +6799,22 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
SDLoc DL(N);
- SDValue Cycles32, OutChain;
-
- if (Subtarget->hasPerfMon()) {
- // Under Power Management extensions, the cycle-count is:
- // mrc p15, #0, <Rt>, c9, c13, #0
- SDValue Ops[] = { N->getOperand(0), // Chain
- DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
- DAG.getConstant(15, DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(9, DL, MVT::i32),
- DAG.getConstant(13, DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i32)
- };
-
- Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
- DAG.getVTList(MVT::i32, MVT::Other), Ops);
- OutChain = Cycles32.getValue(1);
- } else {
- // Intrinsic is defined to return 0 on unsupported platforms. Technically
- // there are older ARM CPUs that have implementation-specific ways of
- // obtaining this information (FIXME!).
- Cycles32 = DAG.getConstant(0, DL, MVT::i32);
- OutChain = DAG.getEntryNode();
- }
-
+ // Under Power Management extensions, the cycle-count is:
+ // mrc p15, #0, <Rt>, c9, c13, #0
+ SDValue Ops[] = { N->getOperand(0), // Chain
+ DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
+ DAG.getConstant(15, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(9, DL, MVT::i32),
+ DAG.getConstant(13, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32)
+ };
- SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
- Cycles32, DAG.getConstant(0, DL, MVT::i32));
- Results.push_back(Cycles64);
- Results.push_back(OutChain);
+ SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
+ DAG.getVTList(MVT::i32, MVT::Other), Ops);
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
+ DAG.getConstant(0, DL, MVT::i32)));
+ Results.push_back(Cycles32.getValue(1));
}
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -6576,15 +6848,17 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
- case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
Subtarget);
case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
case ISD::SHL:
case ISD::SRL:
case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
+ case ISD::SREM: return LowerREM(Op.getNode(), DAG);
+ case ISD::UREM: return LowerREM(Op.getNode(), DAG);
case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
case ISD::SRL_PARTS:
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
@@ -6622,13 +6896,14 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Don't know how to custom lower this!");
case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ARMISD::WIN__DBZCHK: return SDValue();
}
}
/// ReplaceNodeResults - Replace the results of node with an illegal result
/// type with new values built out of custom code.
void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue>&Results,
+ SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
SDValue Res;
switch (N->getOpcode()) {
@@ -6644,9 +6919,18 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
break;
+ case ISD::SREM:
+ case ISD::UREM:
+ Res = LowerREM(N, DAG);
+ break;
case ISD::READCYCLECOUNTER:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
+ case ISD::UDIV:
+ case ISD::SDIV:
+ assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
+ return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
+ Results);
}
if (Res.getNode())
Results.push_back(Res);
@@ -6683,12 +6967,12 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
// Grab constant pool and fixed stack memory operands.
MachineMemOperand *CPMMO =
- MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(),
- MachineMemOperand::MOLoad, 4, 4);
+ MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
+ MachineMemOperand::MOLoad, 4, 4);
MachineMemOperand *FIMMOSt =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore, 4, 4);
+ MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
+ MachineMemOperand::MOStore, 4, 4);
// Load the address of the dispatch MBB into the jump buffer.
if (isThumb2) {
@@ -6792,7 +7076,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
MachineModuleInfo &MMI = MF->getMMI();
for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
++BB) {
- if (!BB->isLandingPad()) continue;
+ if (!BB->isEHPad()) continue;
// FIXME: We should assert that the EH_LABEL is the first MI in the landing
// pad.
@@ -6807,7 +7091,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
for (SmallVectorImpl<unsigned>::iterator
CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
CSI != CSE; ++CSI) {
- CallSiteNumToLPad[*CSI].push_back(BB);
+ CallSiteNumToLPad[*CSI].push_back(&*BB);
MaxCSNum = std::max(MaxCSNum, *CSI);
}
break;
@@ -6840,7 +7124,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
// Shove the dispatch's address into the return slot in the function context.
MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
- DispatchBB->setIsLandingPad();
+ DispatchBB->setIsEHPad();
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
unsigned trap_opcode;
@@ -6864,10 +7148,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
// context.
SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
- MachineMemOperand *FIMMOLd =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad |
- MachineMemOperand::MOVolatile, 4, 4);
+ MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI),
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4);
MachineInstrBuilder MIB;
MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
@@ -6982,9 +7265,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
.addReg(NewVReg2, RegState::Kill)
.addReg(NewVReg3));
- MachineMemOperand *JTMMOLd =
- MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
- MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
+ MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
@@ -7066,9 +7348,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
.addJumpTableIndex(MJTI));
- MachineMemOperand *JTMMOLd =
- MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
- MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
+ MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
AddDefaultPred(
BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
@@ -7109,13 +7390,14 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
BB->succ_end());
while (!Successors.empty()) {
MachineBasicBlock *SMBB = Successors.pop_back_val();
- if (SMBB->isLandingPad()) {
+ if (SMBB->isEHPad()) {
BB->removeSuccessor(SMBB);
MBBLPads.push_back(SMBB);
}
}
- BB->addSuccessor(DispatchBB);
+ BB->addSuccessor(DispatchBB, BranchProbability::getZero());
+ BB->normalizeSuccProbs();
// Find the invoke call and mark all of the callee-saved registers as
// 'implicit defined' so that they're spilled. This prevents code from
@@ -7157,7 +7439,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
// landing pad now.
for (SmallVectorImpl<MachineBasicBlock*>::iterator
I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
- (*I)->setIsLandingPad(false);
+ (*I)->setIsEHPad(false);
// The instruction is gone now.
MI->eraseFromParent();
@@ -7280,8 +7562,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
// Otherwise, we will generate unrolled scalar copies.
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
unsigned dest = MI->getOperand(0).getReg();
unsigned src = MI->getOperand(1).getReg();
@@ -7574,6 +7855,32 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
}
MachineBasicBlock *
+ARMTargetLowering::EmitLowered__dbzchk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+
+ MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
+ MF->push_back(ContBB);
+ ContBB->splice(ContBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ MBB->addSuccessor(ContBB);
+
+ MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ MF->push_back(TrapBB);
+ BuildMI(TrapBB, DL, TII->get(ARM::t2UDF)).addImm(249);
+ MBB->addSuccessor(TrapBB);
+
+ BuildMI(*MBB, MI, DL, TII->get(ARM::tCBZ))
+ .addReg(MI->getOperand(0).getReg())
+ .addMBB(TrapBB);
+
+ MI->eraseFromParent();
+ return ContBB;
+}
+
+MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
@@ -7643,8 +7950,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
@@ -7741,6 +8047,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::tInt_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ return BB;
+
+ case ARM::Int_eh_sjlj_setup_dispatch:
EmitSjLjDispatchBlock(MI, BB);
return BB;
@@ -7759,8 +8068,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
// SinkBB: V1 = PHI(V2, V3)
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator BBI = BB;
- ++BBI;
+ MachineFunction::iterator BBI = ++BB->getIterator();
MachineFunction *Fn = BB->getParent();
MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
@@ -7824,11 +8132,46 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return EmitStructByval(MI, BB);
case ARM::WIN__CHKSTK:
return EmitLowered__chkstk(MI, BB);
+ case ARM::WIN__DBZCHK:
+ return EmitLowered__dbzchk(MI, BB);
+ }
+}
+
+/// \brief Attaches vregs to MEMCPY that it will use as scratch registers
+/// when it is expanded into LDM/STM. This is done as a post-isel lowering
+/// instead of as a custom inserter because we need the use list from the SDNode.
+static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
+ MachineInstr *MI, const SDNode *Node) {
+ bool isThumb1 = Subtarget->isThumb1Only();
+
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineInstrBuilder MIB(*MF, MI);
+
+ // If the new dst/src is unused mark it as dead.
+ if (!Node->hasAnyUseOfValue(0)) {
+ MI->getOperand(0).setIsDead(true);
+ }
+ if (!Node->hasAnyUseOfValue(1)) {
+ MI->getOperand(1).setIsDead(true);
+ }
+
+ // The MEMCPY both defines and kills the scratch registers.
+ for (unsigned I = 0; I != MI->getOperand(4).getImm(); ++I) {
+ unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass);
+ MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
}
}
void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
+ if (MI->getOpcode() == ARM::MEMCPY) {
+ attachMEMCPYScratchRegs(Subtarget, MI, Node);
+ return;
+ }
+
const MCInstrDesc *MCID = &MI->getDesc();
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
@@ -7898,10 +8241,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// Helper function that checks if N is a null or all ones constant.
static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
- if (!C)
- return false;
- return AllOnes ? C->isAllOnesValue() : C->isNullValue();
+ return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
}
// Return true if N is conditionally 0 or all ones.
@@ -8723,12 +9063,88 @@ static SDValue PerformXORCombine(SDNode *N,
return SDValue();
}
-/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
-/// the bits being cleared by the AND are not demanded by the BFI.
+// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
+// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
+// their position in "to" (Rd).
+static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
+ assert(N->getOpcode() == ARMISD::BFI);
+
+ SDValue From = N->getOperand(1);
+ ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
+ FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
+
+ // If the Base came from a SHR #C, we can deduce that it is really testing bit
+ // #C in the base of the SHR.
+ if (From->getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(From->getOperand(1))) {
+ APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
+ assert(Shift.getLimitedValue() < 32 && "Shift too large!");
+ FromMask <<= Shift.getLimitedValue(31);
+ From = From->getOperand(0);
+ }
+
+ return From;
+}
+
+// If A and B contain one contiguous set of bits, does A | B == A . B?
+//
+// Neither A nor B must be zero.
+static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
+ unsigned LastActiveBitInA = A.countTrailingZeros();
+ unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
+ return LastActiveBitInA - 1 == FirstActiveBitInB;
+}
+
+static SDValue FindBFIToCombineWith(SDNode *N) {
+ // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with,
+ // if one exists.
+ APInt ToMask, FromMask;
+ SDValue From = ParseBFI(N, ToMask, FromMask);
+ SDValue To = N->getOperand(0);
+
+ // Now check for a compatible BFI to merge with. We can pass through BFIs that
+ // aren't compatible, but not if they set the same bit in their destination as
+ // we do (or that of any BFI we're going to combine with).
+ SDValue V = To;
+ APInt CombinedToMask = ToMask;
+ while (V.getOpcode() == ARMISD::BFI) {
+ APInt NewToMask, NewFromMask;
+ SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
+ if (NewFrom != From) {
+ // This BFI has a different base. Keep going.
+ CombinedToMask |= NewToMask;
+ V = V.getOperand(0);
+ continue;
+ }
+
+ // Do the written bits conflict with any we've seen so far?
+ if ((NewToMask & CombinedToMask).getBoolValue())
+ // Conflicting bits - bail out because going further is unsafe.
+ return SDValue();
+
+ // Are the new bits contiguous when combined with the old bits?
+ if (BitsProperlyConcatenate(ToMask, NewToMask) &&
+ BitsProperlyConcatenate(FromMask, NewFromMask))
+ return V;
+ if (BitsProperlyConcatenate(NewToMask, ToMask) &&
+ BitsProperlyConcatenate(NewFromMask, FromMask))
+ return V;
+
+ // We've seen a write to some bits, so track it.
+ CombinedToMask |= NewToMask;
+ // Keep going...
+ V = V.getOperand(0);
+ }
+
+ return SDValue();
+}
+
static SDValue PerformBFICombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue N1 = N->getOperand(1);
if (N1.getOpcode() == ISD::AND) {
+ // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
+ // the bits being cleared by the AND are not demanded by the BFI.
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N11C)
return SDValue();
@@ -8744,6 +9160,38 @@ static SDValue PerformBFICombine(SDNode *N,
return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
N->getOperand(0), N1.getOperand(0),
N->getOperand(2));
+ } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
+ // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes.
+ // Keep track of any consecutive bits set that all come from the same base
+ // value. We can combine these together into a single BFI.
+ SDValue CombineBFI = FindBFIToCombineWith(N);
+ if (CombineBFI == SDValue())
+ return SDValue();
+
+ // We've found a BFI.
+ APInt ToMask1, FromMask1;
+ SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
+
+ APInt ToMask2, FromMask2;
+ SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
+ assert(From1 == From2);
+ (void)From2;
+
+ // First, unlink CombineBFI.
+ DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0));
+ // Then create a new BFI, combining the two together.
+ APInt NewFromMask = FromMask1 | FromMask2;
+ APInt NewToMask = ToMask1 | ToMask2;
+
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ if (NewFromMask[0] == 0)
+ From1 = DCI.DAG.getNode(
+ ISD::SRL, dl, VT, From1,
+ DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
+ return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1,
+ DCI.DAG.getConstant(~NewToMask, dl, VT));
}
return SDValue();
}
@@ -9521,32 +9969,6 @@ static SDValue PerformSTORECombine(SDNode *N,
return SDValue();
}
-// isConstVecPow2 - Return true if each vector element is a power of 2, all
-// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
-static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
-{
- integerPart cN;
- integerPart c0 = 0;
- for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
- I != E; I++) {
- ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
- if (!C)
- return false;
-
- bool isExact;
- APFloat APF = C->getValueAPF();
- if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
- != APFloat::opOK || !isExact)
- return false;
-
- c0 = (I == 0) ? cN : c0;
- if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
- return false;
- }
- C = c0;
- return true;
-}
-
/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
/// can replace combinations of VMUL and VCVT (floating-point to integer)
/// when the VMUL has a constant operand that is a power of 2.
@@ -9556,30 +9978,25 @@ static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
/// vcvt.s32.f32 d16, d16
/// becomes:
/// vcvt.s32.f32 d16, d16, #3
-static SDValue PerformVCVTCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
+static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
- SelectionDAG &DAG = DCI.DAG;
- SDValue Op = N->getOperand(0);
+ if (!Subtarget->hasNEON())
+ return SDValue();
- if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
- Op.getOpcode() != ISD::FMUL)
+ SDValue Op = N->getOperand(0);
+ if (!Op.getValueType().isVector() || Op.getOpcode() != ISD::FMUL)
return SDValue();
- uint64_t C;
- SDValue N0 = Op->getOperand(0);
SDValue ConstVec = Op->getOperand(1);
- bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
-
- if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
- !isConstVecPow2(ConstVec, isSigned, C))
+ if (!isa<BuildVectorSDNode>(ConstVec))
return SDValue();
MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
+ uint32_t FloatBits = FloatTy.getSizeInBits();
MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
+ uint32_t IntBits = IntTy.getSizeInBits();
unsigned NumLanes = Op.getValueType().getVectorNumElements();
- if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 ||
- NumLanes > 4) {
+ if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
// These instructions only exist converting from f32 to i32. We can handle
// smaller integers by generating an extra truncate, but larger ones would
// be lossy. We also can't handle more then 4 lanes, since these intructions
@@ -9587,16 +10004,22 @@ static SDValue PerformVCVTCombine(SDNode *N,
return SDValue();
}
+ BitVector UndefElements;
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+ int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
+ if (C == -1 || C == 0 || C > 32)
+ return SDValue();
+
SDLoc dl(N);
+ bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
Intrinsic::arm_neon_vcvtfp2fxu;
- SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
- NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
- DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
- N0,
- DAG.getConstant(Log2_64(C), dl, MVT::i32));
+ SDValue FixConv = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
+ DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
+ DAG.getConstant(C, dl, MVT::i32));
- if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
+ if (IntBits < FloatBits)
FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
return FixConv;
@@ -9611,38 +10034,44 @@ static SDValue PerformVCVTCombine(SDNode *N,
/// vdiv.f32 d16, d17, d16
/// becomes:
/// vcvt.f32.s32 d16, d16, #3
-static SDValue PerformVDIVCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
+static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
- SelectionDAG &DAG = DCI.DAG;
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
SDValue Op = N->getOperand(0);
unsigned OpOpcode = Op.getNode()->getOpcode();
-
- if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
+ if (!N->getValueType(0).isVector() ||
(OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
return SDValue();
- uint64_t C;
SDValue ConstVec = N->getOperand(1);
- bool isSigned = OpOpcode == ISD::SINT_TO_FP;
-
- if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
- !isConstVecPow2(ConstVec, isSigned, C))
+ if (!isa<BuildVectorSDNode>(ConstVec))
return SDValue();
MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
+ uint32_t FloatBits = FloatTy.getSizeInBits();
MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
- if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
+ uint32_t IntBits = IntTy.getSizeInBits();
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
// These instructions only exist converting from i32 to f32. We can handle
// smaller integers by generating an extra extend, but larger ones would
- // be lossy.
+ // be lossy. We also can't handle more then 4 lanes, since these intructions
+ // only support v2i32/v4i32 types.
return SDValue();
}
+ BitVector UndefElements;
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+ int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
+ if (C == -1 || C == 0 || C > 32)
+ return SDValue();
+
SDLoc dl(N);
+ bool isSigned = OpOpcode == ISD::SINT_TO_FP;
SDValue ConvInput = Op.getOperand(0);
- unsigned NumLanes = Op.getValueType().getVectorNumElements();
- if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
+ if (IntBits < FloatBits)
ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
ConvInput);
@@ -9652,7 +10081,7 @@ static SDValue PerformVDIVCombine(SDNode *N,
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
Op.getValueType(),
DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
- ConvInput, DAG.getConstant(Log2_64(C), dl, MVT::i32));
+ ConvInput, DAG.getConstant(C, dl, MVT::i32));
}
/// Getvshiftimm - Check if this is a valid build_vector for the immediate
@@ -9680,7 +10109,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
/// 0 <= Value <= ElementBits for a long left shift.
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
- unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
if (! getVShiftImm(Op, ElementBits, Cnt))
return false;
return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
@@ -9695,12 +10124,16 @@ static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
- unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
if (! getVShiftImm(Op, ElementBits, Cnt))
return false;
- if (isIntrinsic)
+ if (!isIntrinsic)
+ return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
+ if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) {
Cnt = -Cnt;
- return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
+ return true;
+ }
+ return false;
}
/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
@@ -9939,89 +10372,123 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
-/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
-static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
- // If the target supports NEON, try to use vmax/vmin instructions for f32
- // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set,
- // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
- // a NaN; only do the transformation when it matches that behavior.
-
- // For now only do this when using NEON for FP operations; if using VFP, it
- // is not obvious that the benefit outweighs the cost of switching to the
- // NEON pipeline.
- if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
- N->getValueType(0) != MVT::f32)
+static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero,
+ APInt &KnownOne) {
+ if (Op.getOpcode() == ARMISD::BFI) {
+ // Conservatively, we can recurse down the first operand
+ // and just mask out all affected bits.
+ computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
+
+ // The operand to BFI is already a mask suitable for removing the bits it
+ // sets.
+ ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
+ APInt Mask = CI->getAPIntValue();
+ KnownZero &= Mask;
+ KnownOne &= Mask;
+ return;
+ }
+ if (Op.getOpcode() == ARMISD::CMOV) {
+ APInt KZ2(KnownZero.getBitWidth(), 0);
+ APInt KO2(KnownOne.getBitWidth(), 0);
+ computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne);
+ computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2);
+
+ KnownZero &= KZ2;
+ KnownOne &= KO2;
+ return;
+ }
+ return DAG.computeKnownBits(Op, KnownZero, KnownOne);
+}
+
+SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
+ // If we have a CMOV, OR and AND combination such as:
+ // if (x & CN)
+ // y |= CM;
+ //
+ // And:
+ // * CN is a single bit;
+ // * All bits covered by CM are known zero in y
+ //
+ // Then we can convert this into a sequence of BFI instructions. This will
+ // always be a win if CM is a single bit, will always be no worse than the
+ // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
+ // three bits (due to the extra IT instruction).
+
+ SDValue Op0 = CMOV->getOperand(0);
+ SDValue Op1 = CMOV->getOperand(1);
+ auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
+ auto CC = CCNode->getAPIntValue().getLimitedValue();
+ SDValue CmpZ = CMOV->getOperand(4);
+
+ // The compare must be against zero.
+ if (!isNullConstant(CmpZ->getOperand(1)))
+ return SDValue();
+
+ assert(CmpZ->getOpcode() == ARMISD::CMPZ);
+ SDValue And = CmpZ->getOperand(0);
+ if (And->getOpcode() != ISD::AND)
+ return SDValue();
+ ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(And->getOperand(1));
+ if (!AndC || !AndC->getAPIntValue().isPowerOf2())
return SDValue();
+ SDValue X = And->getOperand(0);
- SDValue CondLHS = N->getOperand(0);
- SDValue CondRHS = N->getOperand(1);
- SDValue LHS = N->getOperand(2);
- SDValue RHS = N->getOperand(3);
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
-
- unsigned Opcode = 0;
- bool IsReversed;
- if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
- IsReversed = false; // x CC y ? x : y
- } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
- IsReversed = true ; // x CC y ? y : x
+ if (CC == ARMCC::EQ) {
+ // We're performing an "equal to zero" compare. Swap the operands so we
+ // canonicalize on a "not equal to zero" compare.
+ std::swap(Op0, Op1);
} else {
- return SDValue();
+ assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
}
+
+ if (Op1->getOpcode() != ISD::OR)
+ return SDValue();
- bool IsUnordered;
- switch (CC) {
- default: break;
- case ISD::SETOLT:
- case ISD::SETOLE:
- case ISD::SETLT:
- case ISD::SETLE:
- case ISD::SETULT:
- case ISD::SETULE:
- // If LHS is NaN, an ordered comparison will be false and the result will
- // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS
- // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
- IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
- if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
- break;
- // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
- // will return -0, so vmin can only be used for unsafe math or if one of
- // the operands is known to be nonzero.
- if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
- !DAG.getTarget().Options.UnsafeFPMath &&
- !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
- break;
- Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
- break;
+ ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
+ if (!OrC)
+ return SDValue();
+ SDValue Y = Op1->getOperand(0);
- case ISD::SETOGT:
- case ISD::SETOGE:
- case ISD::SETGT:
- case ISD::SETGE:
- case ISD::SETUGT:
- case ISD::SETUGE:
- // If LHS is NaN, an ordered comparison will be false and the result will
- // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS
- // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
- IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
- if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
- break;
- // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
- // will return +0, so vmax can only be used for unsafe math or if one of
- // the operands is known to be nonzero.
- if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
- !DAG.getTarget().Options.UnsafeFPMath &&
- !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
- break;
- Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
- break;
- }
+ if (Op0 != Y)
+ return SDValue();
+
+ // Now, is it profitable to continue?
+ APInt OrCI = OrC->getAPIntValue();
+ unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
+ if (OrCI.countPopulation() > Heuristic)
+ return SDValue();
- if (!Opcode)
+ // Lastly, can we determine that the bits defined by OrCI
+ // are zero in Y?
+ APInt KnownZero, KnownOne;
+ computeKnownBits(DAG, Y, KnownZero, KnownOne);
+ if ((OrCI & KnownZero) != OrCI)
return SDValue();
- return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
+
+ // OK, we can do the combine.
+ SDValue V = Y;
+ SDLoc dl(X);
+ EVT VT = X.getValueType();
+ unsigned BitInX = AndC->getAPIntValue().logBase2();
+
+ if (BitInX != 0) {
+ // We must shift X first.
+ X = DAG.getNode(ISD::SRL, dl, VT, X,
+ DAG.getConstant(BitInX, dl, VT));
+ }
+
+ for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
+ BitInY < NumActiveBits; ++BitInY) {
+ if (OrCI[BitInY] == 0)
+ continue;
+ APInt Mask(VT.getSizeInBits(), 0);
+ Mask.setBit(BitInY);
+ V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
+ // Confusingly, the operand is an *inverted* mask.
+ DAG.getConstant(~Mask, dl, VT));
+ }
+
+ return V;
}
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
@@ -10042,6 +10509,13 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
ARMCC::CondCodes CC =
(ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+ // BFI is only available on V6T2+.
+ if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
+ SDValue R = PerformCMOVToBFICombine(N, DAG);
+ if (R)
+ return R;
+ }
+
// Simplify
// mov r1, r0
// cmp r1, x
@@ -10108,8 +10582,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
- case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget);
+ case ISD::FP_TO_UINT:
+ return PerformVCVTCombine(N, DCI.DAG, Subtarget);
+ case ISD::FDIV:
+ return PerformVDIVCombine(N, DCI.DAG, Subtarget);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
@@ -10117,7 +10593,6 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
- case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
case ISD::LOAD: return PerformLOADCombine(N, DCI);
case ARMISD::VLD2DUP:
@@ -11043,37 +11518,61 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
-SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
- assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only");
- unsigned Opcode = Op->getOpcode();
- assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
- "Invalid opcode for Div/Rem lowering");
- bool isSigned = (Opcode == ISD::SDIVREM);
- EVT VT = Op->getValueType(0);
- Type *Ty = VT.getTypeForEVT(*DAG.getContext());
-
+static RTLIB::Libcall getDivRemLibcall(
+ const SDNode *N, MVT::SimpleValueType SVT) {
+ assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
+ N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
+ "Unhandled Opcode in getDivRemLibcall");
+ bool isSigned = N->getOpcode() == ISD::SDIVREM ||
+ N->getOpcode() == ISD::SREM;
RTLIB::Libcall LC;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (SVT) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
}
+ return LC;
+}
- SDValue InChain = DAG.getEntryNode();
-
+static TargetLowering::ArgListTy getDivRemArgList(
+ const SDNode *N, LLVMContext *Context) {
+ assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
+ N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
+ "Unhandled Opcode in getDivRemArgList");
+ bool isSigned = N->getOpcode() == ISD::SDIVREM ||
+ N->getOpcode() == ISD::SREM;
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
- EVT ArgVT = Op->getOperand(i).getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- Entry.Node = Op->getOperand(i);
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = N->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*Context);
+ Entry.Node = N->getOperand(i);
Entry.Ty = ArgTy;
Entry.isSExt = isSigned;
Entry.isZExt = !isSigned;
Args.push_back(Entry);
}
+ return Args;
+}
+
+SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
+ assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) &&
+ "Register-based DivRem lowering only");
+ unsigned Opcode = Op->getOpcode();
+ assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
+ "Invalid opcode for Div/Rem lowering");
+ bool isSigned = (Opcode == ISD::SDIVREM);
+ EVT VT = Op->getValueType(0);
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+
+ RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
+ VT.getSimpleVT().SimpleTy);
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(),
+ DAG.getContext());
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy(DAG.getDataLayout()));
@@ -11090,6 +11589,47 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
return CallInfo.first;
}
+// Lowers REM using divmod helpers
+// see RTABI section 4.2/4.3
+SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
+ // Build return types (div and rem)
+ std::vector<Type*> RetTyParams;
+ Type *RetTyElement;
+
+ switch (N->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
+ case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
+ case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
+ case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
+ }
+
+ RetTyParams.push_back(RetTyElement);
+ RetTyParams.push_back(RetTyElement);
+ ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
+ Type *RetTy = StructType::get(*DAG.getContext(), ret);
+
+ RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
+ SimpleTy);
+ SDValue InChain = DAG.getEntryNode();
+ TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext());
+ bool isSigned = N->getOpcode() == ISD::SREM;
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+ getPointerTy(DAG.getDataLayout()));
+
+ // Lower call
+ CallLoweringInfo CLI(DAG);
+ CLI.setChain(InChain)
+ .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args), 0)
+ .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ // Return second (rem) result operand (first contains div)
+ SDNode *ResNode = CallResult.first.getNode();
+ assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
+ return ResNode->getOperand(1);
+}
+
SDValue
ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() && "unsupported target platform");
@@ -11124,8 +11664,8 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
SDValue SrcVal = Op.getOperand(0);
- return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
- /*isSigned*/ false, SDLoc(Op)).first;
+ return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
+ SDLoc(Op)).first;
}
SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
@@ -11137,8 +11677,8 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
SDValue SrcVal = Op.getOperand(0);
- return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
- /*isSigned*/ false, SDLoc(Op)).first;
+ return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
+ SDLoc(Op)).first;
}
bool
@@ -11186,7 +11726,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
- uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
+ uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
@@ -11212,7 +11752,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
- NumElts += DL.getTypeAllocSize(ArgTy) / 8;
+ NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
@@ -11295,8 +11835,6 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
-bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; }
-
Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
ARM_MB::MemBOpt Domain) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -11392,19 +11930,26 @@ bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
// guarantee, see DDI0406C ARM architecture reference manual,
// sections A8.8.72-74 LDRD)
-bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+TargetLowering::AtomicExpansionKind
+ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
- return (Size == 64) && !Subtarget->isMClass();
+ return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
+ : AtomicExpansionKind::None;
}
// For the real atomic operations, we have ldrex/strex up to 32 bits,
// and up to 64 bits on the non-M profiles
-TargetLoweringBase::AtomicRMWExpansionKind
+TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
return (Size <= (Subtarget->isMClass() ? 32U : 64U))
- ? AtomicRMWExpansionKind::LLSC
- : AtomicRMWExpansionKind::None;
+ ? AtomicExpansionKind::LLSC
+ : AtomicExpansionKind::None;
+}
+
+bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
+ AtomicCmpXchgInst *AI) const {
+ return true;
}
// This has so far only been implemented for MachO.
@@ -11419,7 +11964,7 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
return false;
// Floating point values and vector values map to the same register file.
- // Therefore, althought we could do a store extract of a vector type, this is
+ // Therefore, although we could do a store extract of a vector type, this is
// better to leave at float as we have more freedom in the addressing mode for
// those.
if (VectorTy->isFPOrFPVectorTy())
@@ -11441,6 +11986,14 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
return false;
}
+bool ARMTargetLowering::isCheapToSpeculateCttz() const {
+ return Subtarget->hasV6T2Ops();
+}
+
+bool ARMTargetLowering::isCheapToSpeculateCtlz() const {
+ return Subtarget->hasV6T2Ops();
+}
+
Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -11477,6 +12030,14 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
cast<PointerType>(Addr->getType())->getElementType());
}
+void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
+ IRBuilder<> &Builder) const {
+ if (!Subtarget->hasV7Ops())
+ return;
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Builder.CreateCall(llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
+}
+
Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr,
AtomicOrdering Ord) const {
@@ -11534,12 +12095,12 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Type *EltTy = VecTy->getVectorElementType();
const DataLayout &DL = LI->getModule()->getDataLayout();
- unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
- bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
+ unsigned VecSize = DL.getTypeSizeInBits(VecTy);
+ bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
- // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't
- // support i64/f64 element).
- if ((VecSize != 64 && VecSize != 128) || EltIs64Bits)
+ // Skip if we do not have NEON and skip illegal vector types and vector types
+ // with i64/f64 elements (vldN doesn't support i64/f64 elements).
+ if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits)
return false;
// A pointer vector can not be the return type of the ldN intrinsics. Need to
@@ -11552,9 +12113,6 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Intrinsic::arm_neon_vld3,
Intrinsic::arm_neon_vld4};
- Function *VldnFunc =
- Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], VecTy);
-
IRBuilder<> Builder(LI);
SmallVector<Value *, 2> Ops;
@@ -11562,6 +12120,9 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr));
Ops.push_back(Builder.getInt32(LI->getAlignment()));
+ Type *Tys[] = { VecTy, Int8Ptr };
+ Function *VldnFunc =
+ Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
// Replace uses of each shufflevector with the corresponding vector loaded
@@ -11624,12 +12185,13 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
const DataLayout &DL = SI->getModule()->getDataLayout();
- unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
- bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
+ unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
+ bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
- // Skip illegal sub vector types and vector types of i64/f64 element (vstN
- // doesn't support i64/f64 element).
- if ((SubVecSize != 64 && SubVecSize != 128) || EltIs64Bits)
+ // Skip if we do not have NEON and skip illegal vector types and vector types
+ // with i64/f64 elements (vstN doesn't support i64/f64 elements).
+ if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) ||
+ EltIs64Bits)
return false;
Value *Op0 = SVI->getOperand(0);
@@ -11650,17 +12212,18 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
SubVecTy = VectorType::get(IntTy, NumSubElts);
}
- static Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
- Intrinsic::arm_neon_vst3,
- Intrinsic::arm_neon_vst4};
- Function *VstNFunc = Intrinsic::getDeclaration(
- SI->getModule(), StoreInts[Factor - 2], SubVecTy);
-
+ static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
+ Intrinsic::arm_neon_vst3,
+ Intrinsic::arm_neon_vst4};
SmallVector<Value *, 6> Ops;
Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr));
+ Type *Tys[] = { Int8Ptr, SubVecTy };
+ Function *VstNFunc = Intrinsic::getDeclaration(
+ SI->getModule(), StoreInts[Factor - 2], Tys);
+
// Split the shufflevector operands into sub vectors for the new vstN call.
for (unsigned i = 0; i < Factor; i++)
Ops.push_back(Builder.CreateShuffleVector(
@@ -11681,14 +12244,14 @@ enum HABaseType {
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
uint64_t &Members) {
- if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+ if (auto *ST = dyn_cast<StructType>(Ty)) {
for (unsigned i = 0; i < ST->getNumElements(); ++i) {
uint64_t SubMembers = 0;
if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
return false;
Members += SubMembers;
}
- } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
uint64_t SubMembers = 0;
if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
return false;
@@ -11703,7 +12266,7 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
return false;
Members = 1;
Base = HA_DOUBLE;
- } else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) {
+ } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
Members = 1;
switch (Base) {
case HA_FLOAT:
@@ -11747,3 +12310,17 @@ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
return IsHA || IsIntArray;
}
+
+unsigned ARMTargetLowering::getExceptionPointerRegister(
+ const Constant *PersonalityFn) const {
+ // Platforms which do not use SjLj EH may return values in these registers
+ // via the personality function.
+ return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0;
+}
+
+unsigned ARMTargetLowering::getExceptionSelectorRegister(
+ const Constant *PersonalityFn) const {
+ // Platforms which do not use SjLj EH may return values in these registers
+ // via the personality function.
+ return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index efc9020..b764624 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -63,8 +63,6 @@ namespace llvm {
BCC_i64,
- RBIT, // ARM bitreverse instruction
-
SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
@@ -79,6 +77,7 @@ namespace llvm {
EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch.
TC_RETURN, // Tail call return pseudo.
@@ -91,6 +90,7 @@ namespace llvm {
PRELOAD, // Preload
WIN__CHKSTK, // Windows' __chkstk call to do stack probing.
+ WIN__DBZCHK, // Windows' divide by zero check
VCEQ, // Vector compare equal.
VCEQZ, // Vector compare equal to zero.
@@ -172,12 +172,6 @@ namespace llvm {
// BUILD_VECTOR for this purpose.
BUILD_VECTOR,
- // Floating-point max and min:
- FMAX,
- FMIN,
- VMAXNM,
- VMINNM,
-
// Bit-field insert
BFI,
@@ -189,6 +183,10 @@ namespace llvm {
// Vector bitwise select
VBSL,
+ // Pseudo-instruction representing a memory copy using ldm/stm
+ // instructions.
+ MEMCPY,
+
// Vector load N-element structure to all lanes:
VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
VLD3DUP,
@@ -260,6 +258,7 @@ namespace llvm {
SDNode *Node) const override;
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
+ SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;
@@ -348,6 +347,8 @@ namespace llvm {
getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "Q")
return InlineAsm::Constraint_Q;
+ else if (ConstraintCode == "o")
+ return InlineAsm::Constraint_o;
else if (ConstraintCode.size() == 2) {
if (ConstraintCode[0] == 'U') {
switch(ConstraintCode[1]) {
@@ -420,13 +421,24 @@ namespace llvm {
bool functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
- bool hasLoadLinkedStoreConditional() const override;
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+
Instruction *makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const;
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr, AtomicOrdering Ord) const override;
+ void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
+
Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
bool IsStore, bool IsLoad) const override;
Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
@@ -441,16 +453,21 @@ namespace llvm {
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
- bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- TargetLoweringBase::AtomicRMWExpansionKind
+ TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
bool useLoadStackGuardNode() const override;
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
unsigned &Cost) const override;
+ bool isCheapToSpeculateCttz() const override;
+ bool isCheapToSpeculateCtlz() const override;
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -496,6 +513,7 @@ namespace llvm {
ISD::ArgFlagsTy Flags) const;
SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -508,7 +526,6 @@ namespace llvm {
SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
TLSModel::Model model) const;
- SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -526,6 +543,12 @@ namespace llvm {
const ARMSubtarget *ST) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
+ void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
+ SmallVectorImpl<SDValue> &Results) const;
+ SDValue LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, bool Signed,
+ SDValue &Chain) const;
+ SDValue LowerREM(SDNode *N, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
@@ -635,6 +658,8 @@ namespace llvm {
MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI,
MachineBasicBlock *MBB) const;
+ MachineBasicBlock *EmitLowered__dbzchk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
};
enum NEONModImmType {
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 84f95be..cf973d6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -51,7 +51,8 @@ void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
switch (Opc) {
- default: break;
+ default:
+ break;
case ARM::LDR_PRE_IMM:
case ARM::LDR_PRE_REG:
case ARM::LDR_POST_IMM:
@@ -124,82 +125,10 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
.addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
- MachinePointerInfo::getGOT(), Flag, 4, 4);
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4);
MIB.addMemOperand(MMO);
MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg);
MIB.addReg(Reg, RegState::Kill).addImm(0);
MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
AddDefaultPred(MIB);
}
-
-namespace {
- /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC
- /// global base register for ARM ELF.
- struct ARMCGBR : public MachineFunctionPass {
- static char ID;
- ARMCGBR() : MachineFunctionPass(ID) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- if (AFI->getGlobalBaseReg() == 0)
- return false;
- const ARMSubtarget &STI =
- static_cast<const ARMSubtarget &>(MF.getSubtarget());
- // Don't do this for Thumb1.
- if (STI.isThumb1Only())
- return false;
-
- const TargetMachine &TM = MF.getTarget();
- if (TM.getRelocationModel() != Reloc::PIC_)
- return false;
-
- LLVMContext *Context = &MF.getFunction()->getContext();
- unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- unsigned PCAdj = STI.isThumb() ? 4 : 8;
- ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(
- *Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj);
-
- unsigned Align = TM.getDataLayout()->getPrefTypeAlignment(
- Type::getInt32PtrTy(*Context));
- unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align);
-
- MachineBasicBlock &FirstMBB = MF.front();
- MachineBasicBlock::iterator MBBI = FirstMBB.begin();
- DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
- unsigned TempReg =
- MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
- unsigned Opc = STI.isThumb2() ? ARM::t2LDRpci : ARM::LDRcp;
- const TargetInstrInfo &TII = *STI.getInstrInfo();
- MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
- TII.get(Opc), TempReg)
- .addConstantPoolIndex(Idx);
- if (Opc == ARM::LDRcp)
- MIB.addImm(0);
- AddDefaultPred(MIB);
-
- // Fix the GOT address by adding pc.
- unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
- Opc = STI.isThumb2() ? ARM::tPICADD : ARM::PICADD;
- MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), GlobalBaseReg)
- .addReg(TempReg)
- .addImm(ARMPCLabelIndex);
- if (Opc == ARM::PICADD)
- AddDefaultPred(MIB);
-
- return true;
- }
-
- const char *getPassName() const override {
- return "ARM PIC Global Base Reg Initialization";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-}
-
-char ARMCGBR::ID = 0;
-FunctionPass*
-llvm::createARMGlobalBaseRegPass() { return new ARMCGBR(); }
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index 9f5bde3..b9de83b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -59,6 +59,7 @@ def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+def SDT_ARMEH_SJLJ_SetupDispatch: SDTypeProfile<0, 0, []>;
def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
@@ -70,8 +71,11 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
-def SDT_ARMVMAXNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
-def SDT_ARMVMINNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
+def SDT_WIN__DBZCHK : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+def SDT_ARMMEMCPY : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>]>;
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
@@ -163,21 +167,23 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
SDT_ARMEH_SJLJ_Longjmp,
[SDNPHasChain, SDNPSideEffect]>;
+def ARMeh_sjlj_setup_dispatch: SDNode<"ARMISD::EH_SJLJ_SETUP_DISPATCH",
+ SDT_ARMEH_SJLJ_SetupDispatch,
+ [SDNPHasChain, SDNPSideEffect]>;
def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
[SDNPHasChain, SDNPSideEffect]>;
def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
-def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
-
def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
-def ARMvmaxnm : SDNode<"ARMISD::VMAXNM", SDT_ARMVMAXNM, []>;
-def ARMvminnm : SDNode<"ARMISD::VMINNM", SDT_ARMVMINNM, []>;
+def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+ SDNPMayStore, SDNPMayLoad]>;
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
@@ -209,6 +215,8 @@ def PreV8 : Predicate<"!Subtarget->hasV8Ops()">,
AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
+def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
+ AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
AssemblerPredicate<"FeatureVFP2", "VFP2">;
@@ -228,7 +236,9 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
- AssemblerPredicate<"FeatureFP16","half-float">;
+ AssemblerPredicate<"FeatureFP16","half-float conversions">;
+def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
+ AssemblerPredicate<"FeatureFullFP16","full half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
@@ -236,9 +246,8 @@ def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
AssemblerPredicate<"FeatureT2XtPk",
"pack/extract">;
-def HasThumb2DSP : Predicate<"Subtarget->hasThumb2DSP()">,
- AssemblerPredicate<"FeatureDSPThumb2",
- "thumb2-dsp">;
+def HasDSP : Predicate<"Subtarget->hasDSP()">,
+ AssemblerPredicate<"FeatureDSP", "dsp">;
def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
AssemblerPredicate<"FeatureDB",
"data-barriers">;
@@ -2322,6 +2331,7 @@ def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
let Inst{23-4} = 0b01100000000000000111;
let Inst{3-0} = opt;
}
+def : MnemonicAlias<"smi", "smc">;
// Supervisor Call (Software Interrupt)
let isCall = 1, Uses = [SP] in {
@@ -3671,10 +3681,10 @@ def USAT16 : AI<(outs GPRnopc:$Rd),
let Inst{3-0} = Rn;
}
-def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm:$pos),
- (SSAT imm:$pos, GPRnopc:$a, 0)>;
-def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos),
- (USAT imm:$pos, GPRnopc:$a, 0)>;
+def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm1_32:$pos),
+ (SSAT imm1_32:$pos, GPRnopc:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos),
+ (USAT imm0_31:$pos, GPRnopc:$a, 0)>;
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
@@ -4186,7 +4196,7 @@ def CLZ : AMiscA1I<0b00010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "rbit", "\t$Rd, $Rm",
- [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
+ [(set GPR:$Rd, (bitreverse GPR:$Rm))]>,
Requires<[IsARM, HasV6T2]>,
Sched<[WriteALU]>;
@@ -4578,6 +4588,19 @@ let usesCustomInserter = 1 in {
[(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>;
}
+let hasPostISelHook = 1, Constraints = "$newdst = $dst, $newsrc = $src" in {
+ // %newsrc, %newdst = MEMCPY %dst, %src, N, ...N scratch regs...
+ // Copies N registers worth of memory from address %src to address %dst
+ // and returns the incremented addresses. N scratch register will
+ // be attached for the copy to use.
+ def MEMCPY : PseudoInst<
+ (outs GPR:$newdst, GPR:$newsrc),
+ (ins GPR:$dst, GPR:$src, i32imm:$nreg, variable_ops),
+ NoItinerary,
+ [(set GPR:$newdst, GPR:$newsrc,
+ (ARMmemcopy GPR:$dst, GPR:$src, imm:$nreg))]>;
+}
+
def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
}]>;
@@ -4705,7 +4728,7 @@ def STLEXD : AIstlex<0b01, (outs GPR:$Rd),
def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
[(int_arm_clrex)]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6K]> {
let Inst{31-0} = 0b11110101011111111111000000011111;
}
@@ -5242,6 +5265,12 @@ def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone,
let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in
def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>;
+def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK,
+ [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
+let usesCustomInserter = 1, Defs = [CPSR] in
+ def WIN__DBZCHK : PseudoInst<(outs), (ins GPR:$divisor), NoItinerary,
+ [(win__dbzchk GPR:$divisor)]>;
+
//===----------------------------------------------------------------------===//
// TLS Instructions
//
@@ -5301,6 +5330,10 @@ def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
Requires<[IsARM]>;
}
+let isBarrier = 1, hasSideEffects = 1, usesCustomInserter = 1 in
+def Int_eh_sjlj_setup_dispatch : PseudoInst<(outs), (ins), NoItinerary,
+ [(ARMeh_sjlj_setup_dispatch)]>;
+
// eh.sjlj.dispatchsetup pseudo-instruction.
// This pseudo is used for both ARM and Thumb. Any differences are handled when
// the pseudo is expanded (which happens before any passes that need the
@@ -5622,16 +5655,16 @@ def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
(MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
// Same for AND <--> BIC
def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
- (ANDri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm,
+ (ANDri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
- (ANDri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm,
+ (ANDri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
- (BICri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm,
+ (BICri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
- (BICri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm,
+ (BICri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
// Likewise, "add Rd, mod_imm_neg" -> sub
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index f035d61..7020ffb 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -587,11 +587,6 @@ def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
-def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>]>;
-def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
-def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
-
def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
unsigned EltBits = 0;
@@ -2465,17 +2460,17 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
// Same as above, but not predicated.
-class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
+class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
- : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+ : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
itin, OpcodeStr, Dt,
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
-class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
+class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
- : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
+ : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
itin, OpcodeStr, Dt,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
@@ -3255,6 +3250,13 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
[(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
let Inst{10} = 1; // overwrite F = 1
}
+ def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, "f16", asm, "",
+ [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>,
+ Requires<[HasNEON,HasFullFP16]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
// 128-bit vector types.
def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
@@ -3275,6 +3277,13 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
[(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
let Inst{10} = 1; // overwrite F = 1
}
+ def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, "f16", asm, "",
+ [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>,
+ Requires<[HasNEON,HasFullFP16]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
}
@@ -4110,6 +4119,12 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
v2f32, v2f32, fadd, 1>;
def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
v4f32, v4f32, fadd, 1>;
+def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
+ v4f16, v4f16, fadd, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
+ v8f16, v8f16, fadd, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
// VADDL : Vector Add Long (Q = D + D)
defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
"vaddl", "s", add, sext, 1>;
@@ -4165,10 +4180,21 @@ def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
v2f32, v2f32, fmul, 1>;
def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
v4f32, v4f32, fmul, 1>;
+def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
+ v4f16, v4f16, fmul, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
+ v8f16, v8f16, fmul, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
v2f32, fmul>;
+def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
+ v4f16, fmul>,
+ Requires<[HasNEON,HasFullFP16]>;
def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
(v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
@@ -4277,6 +4303,12 @@ def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
v4f32, fmul_su, fadd_mlx>,
Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
+ v4f16, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
+ v8f16, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
@@ -4285,6 +4317,12 @@ def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
v4f32, v2f32, fmul_su, fadd_mlx>,
Requires<[HasNEON, UseFPVMLx]>;
+def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
+ v4f16, fmul, fadd>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
+ v8f16, v4f16, fmul, fadd>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
def : Pat<(v8i16 (add (v8i16 QPR:$src1),
(mul (v8i16 QPR:$src2),
@@ -4495,6 +4533,12 @@ def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
v4f32, fmul_su, fsub_mlx>,
Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
+ v4f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
+ v8f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
@@ -4503,6 +4547,12 @@ def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
v4f32, v2f32, fmul_su, fsub_mlx>,
Requires<[HasNEON, UseFPVMLx]>;
+def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
+ v4f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
+ v8f16, v4f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
(mul (v8i16 QPR:$src2),
@@ -4570,6 +4620,13 @@ def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
v4f32, fmul_su, fadd_mlx>,
Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
+def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
+ v4f16, fmul, fadd>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
+
+def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
+ v8f16, fmul, fadd>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
// Fused Vector Multiply Subtract (floating-point)
def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
@@ -4578,6 +4635,12 @@ def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
v4f32, fmul_su, fsub_mlx>,
Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
+def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
+ v4f16, fmul, fsub>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
+def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
+ v8f16, fmul, fsub>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
@@ -4602,6 +4665,12 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
v2f32, v2f32, fsub, 0>;
def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
v4f32, v4f32, fsub, 0>;
+def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
+ v4f16, v4f16, fsub, 0>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
+ v8f16, v8f16, fsub, 0>,
+ Requires<[HasNEON,HasFullFP16]>;
// VSUBL : Vector Subtract Long (Q = D - D)
defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
"vsubl", "s", sub, sext, 0>;
@@ -4646,6 +4715,12 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
NEONvceq, 1>;
def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
+def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
+ NEONvceq, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
+ NEONvceq, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
@@ -4660,6 +4735,12 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
NEONvcge, 0>;
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
+def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
+ NEONvcge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
+ NEONvcge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
@@ -4677,6 +4758,12 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
NEONvcgt, 0>;
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
NEONvcgt, 0>;
+def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
+ NEONvcgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
+ NEONvcgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
@@ -4686,36 +4773,68 @@ defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
}
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
-def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
"f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
-def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
"f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
+def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+ "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+ "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
// VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
-def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
"f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
-def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
"f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
+def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+ "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+ "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
// VTST : Vector Test Bits
defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
- (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+ (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
- (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+ (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
- (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+ (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
- (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+ (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
+ (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
+ (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
+ (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
+ (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+}
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
- (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+ (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
- (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+ (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
- (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+ (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
- (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+ (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
+ (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
+ (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
+ (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
+ (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+}
// Vector Bitwise Operations.
@@ -5007,6 +5126,12 @@ def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
"vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
"vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
+def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
+ "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
+ "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
@@ -5014,6 +5139,29 @@ defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
"vabdl", "u", int_arm_neon_vabdu, zext, 1>;
+def abd_shr :
+ PatFrag<(ops node:$in1, node:$in2, node:$shift),
+ (NEONvshrs (sub (zext node:$in1),
+ (zext node:$in2)), (i32 $shift))>;
+
+def : Pat<(xor (v4i32 (bitconvert (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15)))),
+ (v4i32 (bitconvert (v8i16 (add (sub (zext (v8i8 DPR:$opA)),
+ (zext (v8i8 DPR:$opB))),
+ (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15))))))),
+ (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
+
+def : Pat<(xor (v4i32 (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)),
+ (v4i32 (add (sub (zext (v4i16 DPR:$opA)),
+ (zext (v4i16 DPR:$opB))),
+ (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)))),
+ (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
+
+def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
+ (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
+ (zext (v2i32 DPR:$opB))),
+ (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
+ (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
+
// VABA : Vector Absolute Difference and Accumulate
defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
"vaba", "s", int_arm_neon_vabds, add>;
@@ -5031,53 +5179,85 @@ defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
// VMAX : Vector Maximum
defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vmax", "s", int_arm_neon_vmaxs, 1>;
+ "vmax", "s", smax, 1>;
defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vmax", "u", int_arm_neon_vmaxu, 1>;
+ "vmax", "u", umax, 1>;
def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmax", "f32",
- v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+ v2f32, v2f32, fmaxnan, 1>;
def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmax", "f32",
- v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+ v4f32, v4f32, fmaxnan, 1>;
+def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmax", "f16",
+ v4f16, v4f16, fmaxnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmax", "f16",
+ v8f16, v8f16, fmaxnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
// VMAXNM
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
- def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
+ def VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
N3RegFrm, NoItinerary, "vmaxnm", "f32",
- v2f32, v2f32, int_arm_neon_vmaxnm, 1>,
+ v2f32, v2f32, fmaxnum, 1>,
Requires<[HasV8, HasNEON]>;
- def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
+ def VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
N3RegFrm, NoItinerary, "vmaxnm", "f32",
- v4f32, v4f32, int_arm_neon_vmaxnm, 1>,
+ v4f32, v4f32, fmaxnum, 1>,
Requires<[HasV8, HasNEON]>;
+ def VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f16",
+ v4f16, v4f16, fmaxnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f16",
+ v8f16, v8f16, fmaxnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
}
// VMIN : Vector Minimum
defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vmin", "s", int_arm_neon_vmins, 1>;
+ "vmin", "s", smin, 1>;
defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vmin", "u", int_arm_neon_vminu, 1>;
+ "vmin", "u", umin, 1>;
def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmin", "f32",
- v2f32, v2f32, int_arm_neon_vmins, 1>;
+ v2f32, v2f32, fminnan, 1>;
def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmin", "f32",
- v4f32, v4f32, int_arm_neon_vmins, 1>;
+ v4f32, v4f32, fminnan, 1>;
+def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmin", "f16",
+ v4f16, v4f16, fminnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmin", "f16",
+ v8f16, v8f16, fminnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
// VMINNM
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
- def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
+ def VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
N3RegFrm, NoItinerary, "vminnm", "f32",
- v2f32, v2f32, int_arm_neon_vminnm, 1>,
+ v2f32, v2f32, fminnum, 1>,
Requires<[HasV8, HasNEON]>;
- def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
+ def VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
N3RegFrm, NoItinerary, "vminnm", "f32",
- v4f32, v4f32, int_arm_neon_vminnm, 1>,
+ v4f32, v4f32, fminnum, 1>,
Requires<[HasV8, HasNEON]>;
+ def VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f16",
+ v4f16, v4f16, fminnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f16",
+ v8f16, v8f16, fminnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
}
// Vector Pairwise Operations.
@@ -5095,6 +5275,10 @@ def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
IIC_VPBIND, "vpadd", "f32",
v2f32, v2f32, int_arm_neon_vpadd, 0>;
+def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
+ IIC_VPBIND, "vpadd", "f16",
+ v4f16, v4f16, int_arm_neon_vpadd, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
// VPADDL : Vector Pairwise Add Long
defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
@@ -5123,6 +5307,9 @@ def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
"u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
"f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
+def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
+ "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
// VPMIN : Vector Pairwise Minimum
def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
@@ -5139,6 +5326,9 @@ def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
"u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
"f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
+def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
+ "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
@@ -5155,6 +5345,14 @@ def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
IIC_VUNAQ, "vrecpe", "f32",
v4f32, v4f32, int_arm_neon_vrecpe>;
+def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
+ IIC_VUNAD, "vrecpe", "f16",
+ v4f16, v4f16, int_arm_neon_vrecpe>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
+ IIC_VUNAQ, "vrecpe", "f16",
+ v8f16, v8f16, int_arm_neon_vrecpe>,
+ Requires<[HasNEON, HasFullFP16]>;
// VRECPS : Vector Reciprocal Step
def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
@@ -5163,6 +5361,14 @@ def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
IIC_VRECSQ, "vrecps", "f32",
v4f32, v4f32, int_arm_neon_vrecps, 1>;
+def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrecps", "f16",
+ v4f16, v4f16, int_arm_neon_vrecps, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrecps", "f16",
+ v8f16, v8f16, int_arm_neon_vrecps, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
// VRSQRTE : Vector Reciprocal Square Root Estimate
def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
@@ -5177,6 +5383,14 @@ def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
IIC_VUNAQ, "vrsqrte", "f32",
v4f32, v4f32, int_arm_neon_vrsqrte>;
+def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
+ IIC_VUNAD, "vrsqrte", "f16",
+ v4f16, v4f16, int_arm_neon_vrsqrte>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
+ IIC_VUNAQ, "vrsqrte", "f16",
+ v8f16, v8f16, int_arm_neon_vrsqrte>,
+ Requires<[HasNEON, HasFullFP16]>;
// VRSQRTS : Vector Reciprocal Square Root Step
def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
@@ -5185,6 +5399,14 @@ def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
IIC_VRECSQ, "vrsqrts", "f32",
v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
+def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrsqrts", "f16",
+ v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrsqrts", "f16",
+ v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
// Vector Shifts.
@@ -5336,6 +5558,14 @@ def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
"vabs", "f32",
v4f32, v4f32, fabs>;
+def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
+ "vabs", "f16",
+ v4f16, v4f16, fabs>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
+ "vabs", "f16",
+ v8f16, v8f16, fabs>,
+ Requires<[HasNEON, HasFullFP16]>;
def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))),
(v2i32 (bitconvert (v8i8 (add DPR:$src,
@@ -5398,6 +5628,16 @@ def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
"vneg", "f32", "$Vd, $Vm", "",
[(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
+def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
+ "vneg", "f16", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
+ "vneg", "f16", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
+ Requires<[HasNEON, HasFullFP16]>;
def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
@@ -5868,18 +6108,56 @@ def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
v4f32, v4i32, uint_to_fp>;
+def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
+ v4i16, v4f16, fp_to_sint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
+ v4i16, v4f16, fp_to_uint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
+ v4f16, v4i16, sint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
+ v4f16, v4i16, uint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
+ v8i16, v8f16, fp_to_sint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
+ v8i16, v8f16, fp_to_uint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
+ v8f16, v8i16, sint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
+ v8f16, v8i16, uint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+
// VCVT{A, N, P, M}
multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
SDPatternOperator IntU> {
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
- def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
"s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
- def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
"s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
- def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
"u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
- def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
"u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
+ def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ "s16.f16", v4i16, v4f16, IntS>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ "s16.f16", v8i16, v8f16, IntS>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ "u16.f16", v4i16, v4f16, IntU>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ "u16.f16", v8i16, v8f16, IntU>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
}
}
@@ -5898,6 +6176,16 @@ def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
+ v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
+def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
+ v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
+ v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
+ v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
+} // Predicates = [HasNEON, HasFullFP16]
}
let DecoderMethod = "DecodeVCVTQ" in {
@@ -5909,6 +6197,16 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
+ v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
+def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
+ v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
+ v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
+ v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
+} // Predicates = [HasNEON, HasFullFP16]
}
def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
@@ -5929,6 +6227,24 @@ def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
(VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
+ (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
+ (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
+ (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
+ (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
+ (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
+ (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
+ (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
+ (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
+
// VCVT : Vector Convert Between Half-Precision and Single-Precision.
def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
@@ -6182,22 +6498,40 @@ def VTBX4Pseudo
// VRINT : Vector Rounding
multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
- def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary,
+ def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
!strconcat("vrint", op), "f32",
v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
let Inst{9-7} = op9_7;
}
- def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary,
+ def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
!strconcat("vrint", op), "f32",
v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
let Inst{9-7} = op9_7;
}
+ def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
+ !strconcat("vrint", op), "f16",
+ v4f16, v4f16, Int>,
+ Requires<[HasV8, HasNEON, HasFullFP16]> {
+ let Inst{9-7} = op9_7;
+ }
+ def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
+ !strconcat("vrint", op), "f16",
+ v8f16, v8f16, Int>,
+ Requires<[HasV8, HasNEON, HasFullFP16]> {
+ let Inst{9-7} = op9_7;
+ }
}
def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
- (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>;
+ (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
- (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>;
+ (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
+ (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
+ def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
+ (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
+ }
}
defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
@@ -6343,8 +6677,8 @@ def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
def : N2VSPat<fabs, VABSfd>;
def : N2VSPat<fneg, VNEGfd>;
-def : N3VSPat<NEONfmax, VMAXfd>;
-def : N3VSPat<NEONfmin, VMINfd>;
+def : N3VSPat<fmaxnan, VMAXfd>, Requires<[HasNEON]>;
+def : N3VSPat<fminnan, VMINfd>, Requires<[HasNEON]>;
def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
@@ -7704,6 +8038,9 @@ def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
(VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
(VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
+ (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
// Q-register versions.
def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
(VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
@@ -7719,6 +8056,9 @@ def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
(VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
(VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
+ (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
// D-register versions.
@@ -7736,6 +8076,9 @@ def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
(VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
(VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
+ (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
// Q-register versions.
def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
(VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
@@ -7751,6 +8094,9 @@ def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
(VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
(VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
+ (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
// VSWP allows, but does not require, a type suffix.
defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index 40414da..df6f243 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -591,6 +591,34 @@ def tTRAP : TI<(outs), (ins), IIC_Br,
// Load Store Instructions.
//
+// PC-relative loads need to be matched first as constant pool accesses need to
+// always be PC-relative. We do this using AddedComplexity, as the pattern is
+// simpler than the patterns of the other load instructions.
+let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 10 in
+def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
+ T1Encoding<{0,1,0,0,1,?}> {
+ // A6.2 & A8.6.59
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// SP-relative loads should be matched before standard immediate-offset loads as
+// it means we avoid having to move SP to another register.
+let canFoldAsLoad = 1 in
+def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
+ T1LdStSP<{1,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
// Loads: reg/reg and reg/imm5
let canFoldAsLoad = 1, isReMaterializable = 1 in
multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
@@ -598,16 +626,20 @@ multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
AddrMode am, InstrItinClass itin_r,
InstrItinClass itin_i, string asm,
PatFrag opnode> {
- def r : // reg/reg
- T1pILdStEncode<reg_opc,
- (outs tGPR:$Rt), (ins AddrMode_r:$addr),
- am, itin_r, asm, "\t$Rt, $addr",
- [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
+ // Immediate-offset loads should be matched before register-offset loads as
+ // when the offset is a constant it's simpler to first check if it fits in the
+ // immediate offset field then fall back to register-offset if it doesn't.
def i : // reg/imm5
T1pILdStEncodeImm<imm_opc, 1 /* Load */,
(outs tGPR:$Rt), (ins AddrMode_i:$addr),
am, itin_i, asm, "\t$Rt, $addr",
[(set tGPR:$Rt, (opnode AddrMode_i:$addr))]>;
+ // Register-offset loads are matched last.
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs tGPR:$Rt), (ins AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
}
// Stores: reg/reg and reg/imm5
multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
@@ -615,32 +647,32 @@ multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
AddrMode am, InstrItinClass itin_r,
InstrItinClass itin_i, string asm,
PatFrag opnode> {
- def r : // reg/reg
- T1pILdStEncode<reg_opc,
- (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
- am, itin_r, asm, "\t$Rt, $addr",
- [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
def i : // reg/imm5
T1pILdStEncodeImm<imm_opc, 0 /* Store */,
(outs), (ins tGPR:$Rt, AddrMode_i:$addr),
am, itin_i, asm, "\t$Rt, $addr",
[(opnode tGPR:$Rt, AddrMode_i:$addr)]>;
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
}
// A8.6.57 & A8.6.60
-defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4,
+defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr,
t_addrmode_is4, AddrModeT1_4,
IIC_iLoad_r, IIC_iLoad_i, "ldr",
UnOpFrag<(load node:$Src)>>;
// A8.6.64 & A8.6.61
-defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1,
+defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr,
t_addrmode_is1, AddrModeT1_1,
IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
UnOpFrag<(zextloadi8 node:$Src)>>;
// A8.6.76 & A8.6.73
-defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2,
+defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr,
t_addrmode_is2, AddrModeT1_2,
IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
UnOpFrag<(zextloadi16 node:$Src)>>;
@@ -659,58 +691,36 @@ def tLDRSH : // A8.6.84
"ldrsh", "\t$Rt, $addr",
[(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>;
-let canFoldAsLoad = 1 in
-def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
- "ldr", "\t$Rt, $addr",
- [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
- T1LdStSP<{1,?,?}> {
- bits<3> Rt;
- bits<8> addr;
- let Inst{10-8} = Rt;
- let Inst{7-0} = addr;
-}
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
- "ldr", "\t$Rt, $addr",
- [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
- T1Encoding<{0,1,0,0,1,?}> {
- // A6.2 & A8.6.59
+def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
+ "str", "\t$Rt, $addr",
+ [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
+ T1LdStSP<{0,?,?}> {
bits<3> Rt;
bits<8> addr;
let Inst{10-8} = Rt;
- let Inst{7-0} = addr;
+ let Inst{7-0} = addr;
}
// A8.6.194 & A8.6.192
-defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
+defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr,
t_addrmode_is4, AddrModeT1_4,
IIC_iStore_r, IIC_iStore_i, "str",
BinOpFrag<(store node:$LHS, node:$RHS)>>;
// A8.6.197 & A8.6.195
-defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1,
+defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr,
t_addrmode_is1, AddrModeT1_1,
IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
// A8.6.207 & A8.6.205
-defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2,
+defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr,
t_addrmode_is2, AddrModeT1_2,
IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
-def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
- "str", "\t$Rt, $addr",
- [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
- T1LdStSP<{0,?,?}> {
- bits<3> Rt;
- bits<8> addr;
- let Inst{10-8} = Rt;
- let Inst{7-0} = addr;
-}
-
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
@@ -730,6 +740,7 @@ def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
// Writeback version is just a pseudo, as there's no encoding difference.
// Writeback happens iff the base register is not in the destination register
// list.
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
def tLDMIA_UPD :
InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain,
"$Rn = $wb", IIC_iLoad_mu>,
@@ -1328,16 +1339,16 @@ def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs),
(tSUBrr tGPR:$lhs, tGPR:$rhs)>;
// Bswap 16 with load/store
-def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rrs2:$addr)), (i32 16)),
- (tREV16 (tLDRHr t_addrmode_rrs2:$addr))>;
def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)),
(tREV16 (tLDRHi t_addrmode_is2:$addr))>;
-def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
- t_addrmode_rrs2:$addr),
- (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rrs2:$addr)>;
+def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rr:$addr)), (i32 16)),
+ (tREV16 (tLDRHr t_addrmode_rr:$addr))>;
def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
t_addrmode_is2:$addr),
(tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>;
+def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
+ t_addrmode_rr:$addr),
+ (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rr:$addr)>;
// ConstantPool
def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
@@ -1372,10 +1383,10 @@ def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
Requires<[IsThumb, HasV5T]>;
// zextload i1 -> zextload i8
-def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
- (tLDRBr t_addrmode_rrs1:$addr)>;
def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
(tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_rr:$addr),
+ (tLDRBr t_addrmode_rr:$addr)>;
// extload from the stack -> word load from the stack, as it avoids having to
// materialize the base in a separate register. This only works when a word
@@ -1389,61 +1400,61 @@ def : T1Pat<(extloadi16 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
Requires<[IsThumb, IsThumb1Only, IsLE]>;
// extload -> zextload
-def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
-def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
-def : T1Pat<(extloadi8 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
-def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
-def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>;
-def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
+def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi1 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>;
// If it's impossible to use [r,r] address mode for sextload, select to
// ldr{b|h} + sxt{b|h} instead.
def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
(tSXTB (tLDRBi t_addrmode_is1:$addr))>,
Requires<[IsThumb, IsThumb1Only, HasV6]>;
-def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
- (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>,
+def : T1Pat<(sextloadi8 t_addrmode_rr:$addr),
+ (tSXTB (tLDRBr t_addrmode_rr:$addr))>,
Requires<[IsThumb, IsThumb1Only, HasV6]>;
def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
(tSXTH (tLDRHi t_addrmode_is2:$addr))>,
Requires<[IsThumb, IsThumb1Only, HasV6]>;
-def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
- (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>,
+def : T1Pat<(sextloadi16 t_addrmode_rr:$addr),
+ (tSXTH (tLDRHr t_addrmode_rr:$addr))>,
Requires<[IsThumb, IsThumb1Only, HasV6]>;
-def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
- (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>;
def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
(tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>;
-def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
- (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi8 t_addrmode_rr:$addr),
+ (tASRri (tLSLri (tLDRBr t_addrmode_rr:$addr), 24), 24)>;
def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
(tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi16 t_addrmode_rr:$addr),
+ (tASRri (tLSLri (tLDRHr t_addrmode_rr:$addr), 16), 16)>;
def : T1Pat<(atomic_load_8 t_addrmode_is1:$src),
(tLDRBi t_addrmode_is1:$src)>;
-def : T1Pat<(atomic_load_8 t_addrmode_rrs1:$src),
- (tLDRBr t_addrmode_rrs1:$src)>;
+def : T1Pat<(atomic_load_8 t_addrmode_rr:$src),
+ (tLDRBr t_addrmode_rr:$src)>;
def : T1Pat<(atomic_load_16 t_addrmode_is2:$src),
(tLDRHi t_addrmode_is2:$src)>;
-def : T1Pat<(atomic_load_16 t_addrmode_rrs2:$src),
- (tLDRHr t_addrmode_rrs2:$src)>;
+def : T1Pat<(atomic_load_16 t_addrmode_rr:$src),
+ (tLDRHr t_addrmode_rr:$src)>;
def : T1Pat<(atomic_load_32 t_addrmode_is4:$src),
(tLDRi t_addrmode_is4:$src)>;
-def : T1Pat<(atomic_load_32 t_addrmode_rrs4:$src),
- (tLDRr t_addrmode_rrs4:$src)>;
+def : T1Pat<(atomic_load_32 t_addrmode_rr:$src),
+ (tLDRr t_addrmode_rr:$src)>;
def : T1Pat<(atomic_store_8 t_addrmode_is1:$ptr, tGPR:$val),
(tSTRBi tGPR:$val, t_addrmode_is1:$ptr)>;
-def : T1Pat<(atomic_store_8 t_addrmode_rrs1:$ptr, tGPR:$val),
- (tSTRBr tGPR:$val, t_addrmode_rrs1:$ptr)>;
+def : T1Pat<(atomic_store_8 t_addrmode_rr:$ptr, tGPR:$val),
+ (tSTRBr tGPR:$val, t_addrmode_rr:$ptr)>;
def : T1Pat<(atomic_store_16 t_addrmode_is2:$ptr, tGPR:$val),
(tSTRHi tGPR:$val, t_addrmode_is2:$ptr)>;
-def : T1Pat<(atomic_store_16 t_addrmode_rrs2:$ptr, tGPR:$val),
- (tSTRHr tGPR:$val, t_addrmode_rrs2:$ptr)>;
+def : T1Pat<(atomic_store_16 t_addrmode_rr:$ptr, tGPR:$val),
+ (tSTRHr tGPR:$val, t_addrmode_rr:$ptr)>;
def : T1Pat<(atomic_store_32 t_addrmode_is4:$ptr, tGPR:$val),
(tSTRi tGPR:$val, t_addrmode_is4:$ptr)>;
-def : T1Pat<(atomic_store_32 t_addrmode_rrs4:$ptr, tGPR:$val),
- (tSTRr tGPR:$val, t_addrmode_rrs4:$ptr)>;
+def : T1Pat<(atomic_store_32 t_addrmode_rr:$ptr, tGPR:$val),
+ (tSTRr tGPR:$val, t_addrmode_rr:$ptr)>;
// Large immediate handling.
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index aba8a7b..d460d33 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -43,7 +43,7 @@ def t2_shift_imm : Operand<i32> {
// Shifted operands. No register controlled shifts for Thumb2.
// Note: We do not support rrx shifted operands yet.
def t2_so_reg : Operand<i32>, // reg imm
- ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
+ ComplexPattern<i32, 2, "SelectShiftImmShifterOperand",
[shl,srl,sra,rotr]> {
let EncoderMethod = "getT2SORegOpValue";
let PrintMethod = "printT2SOOperand";
@@ -1554,19 +1554,21 @@ def t2STRBT : T2IstT<0b00, "strbt", IIC_iStore_bh_i>;
def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
// ldrd / strd pre / post variants
-// For disassembly only.
+let mayLoad = 1 in
def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
(ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru,
"ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> {
let DecoderMethod = "DecodeT2LDRDPreInstruction";
}
+let mayLoad = 1 in
def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
(ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm),
IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm",
"$addr.base = $wb", []>;
+let mayStore = 1 in
def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!",
@@ -1574,6 +1576,7 @@ def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
let DecoderMethod = "DecodeT2STRDPreInstruction";
}
+let mayStore = 1 in
def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
(ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr,
t2am_imm8s4_offset:$imm),
@@ -2100,7 +2103,7 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR),
def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-24} = 0b010;
let Inst{23} = 0b1;
@@ -2117,7 +2120,7 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
dag iops = (ins rGPR:$Rn, rGPR:$Rm),
string asm = "\t$Rd, $Rn, $Rm">
: T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0101;
let Inst{22-20} = op22_20;
@@ -2215,13 +2218,13 @@ class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm),
NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
"usada8", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
// Signed/Unsigned saturate.
class T2SatI<dag oops, dag iops, InstrItinClass itin,
@@ -2254,7 +2257,7 @@ def t2SSAT: T2SatI<
def t2SSAT16: T2SatI<
(outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary,
"ssat16", "\t$Rd, $sat_imm, $Rn", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
let Inst{20} = 0;
@@ -2278,7 +2281,7 @@ def t2USAT: T2SatI<
def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
NoItinerary,
"usat16", "\t$Rd, $sat_imm, $Rn", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-22} = 0b1111001110;
let Inst{20} = 0;
let Inst{15} = 0;
@@ -2288,8 +2291,8 @@ def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
let Inst{5-4} = 0b00;
}
-def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
-def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), (t2USAT imm0_31:$pos, GPR:$a, 0)>;
//===----------------------------------------------------------------------===//
// Shift and rotate Instructions.
@@ -2605,7 +2608,7 @@ def t2UMAAL : T2MulLong<0b110, 0b0110,
(outs rGPR:$RdLo, rGPR:$RdHi),
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
"umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
} // hasSideEffects
// Rounding variants of the below included for disassembly only
@@ -2614,7 +2617,7 @@ def t2UMAAL : T2MulLong<0b110, 0b0110,
def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
"smmul", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2624,7 +2627,7 @@ def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
"smmulr", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2636,7 +2639,7 @@ def t2SMMLA : T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmla", "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2646,7 +2649,7 @@ def t2SMMLA : T2FourReg<
def t2SMMLAR: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2657,7 +2660,7 @@ def t2SMMLS: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmls", "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
@@ -2667,7 +2670,7 @@ def t2SMMLS: T2FourReg<
def t2SMMLSR:T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
@@ -2679,7 +2682,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
!strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
(sext_inreg rGPR:$Rm, i16)))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2692,7 +2695,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
!strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
(sra rGPR:$Rm, (i32 16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2705,7 +2708,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
!strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
(sext_inreg rGPR:$Rm, i16)))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2718,7 +2721,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
!strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2730,7 +2733,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
[]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2742,7 +2745,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
[]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2760,7 +2763,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
[(set rGPR:$Rd, (add rGPR:$Ra,
(opnode (sext_inreg rGPR:$Rn, i16),
(sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2773,7 +2776,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
(sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2786,7 +2789,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
(sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2799,7 +2802,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2811,7 +2814,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
[]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2823,7 +2826,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
[]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2839,79 +2842,79 @@ defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
def t2SMUAD: T2ThreeReg_mac<
0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2SMUADX:T2ThreeReg_mac<
0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2SMUSD: T2ThreeReg_mac<
0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2SMUSDX:T2ThreeReg_mac<
0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2SMLAD : T2FourReg_mac<
0, 0b010, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
"\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLADX : T2FourReg_mac<
0, 0b010, 0b0001, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
"\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLSD : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
"\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
"\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "smlald",
"\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaldx",
"\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlsld",
"\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
"\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
//===----------------------------------------------------------------------===//
// Division Instructions.
@@ -2961,7 +2964,7 @@ def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
"rbit", "\t$Rd, $Rm",
- [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>,
+ [(set rGPR:$Rd, (bitreverse rGPR:$Rm))]>,
Sched<[WriteALU]>;
def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index e83f8c8..050cd1a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -20,7 +20,6 @@ def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
-
//===----------------------------------------------------------------------===//
// Operand Definitions.
//
@@ -93,7 +92,7 @@ def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
IIC_fpLoad32, "vldr", "\t$Sd, $addr",
- [(set SPR:$Sd, (load addrmode5:$addr))]> {
+ [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]> {
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
@@ -107,7 +106,7 @@ def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
IIC_fpStore32, "vstr", "\t$Sd, $addr",
- [(store SPR:$Sd, addrmode5:$addr)]> {
+ [(alignedstore32 SPR:$Sd, addrmode5:$addr)]> {
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
@@ -393,8 +392,8 @@ multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
}
}
-defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, ARMvmaxnm>;
-defm VMINNM : vmaxmin_inst<"vminnm", 1, ARMvminnm>;
+defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>;
+defm VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>;
// Match reassociated forms only if not sign dependent rounding.
def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
@@ -541,19 +540,23 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
// FIXME: Verify encoding after integrated assembler is working.
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
@@ -922,6 +925,22 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
let isRegSequence = 1;
}
+// Hoist an fabs or a fneg of a value coming from integer registers
+// and do the fabs/fneg on the integer value. This is never a lose
+// and could enable the conversion to float to be removed completely.
+def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
+ Requires<[IsARM]>;
+def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (t2BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
+ Requires<[IsThumb2]>;
+def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (EORri GPR:$Rh, (i32 0x80000000)))>,
+ Requires<[IsARM]>;
+def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (t2EORri GPR:$Rh, (i32 0x80000000)))>,
+ Requires<[IsThumb2]>;
+
let hasSideEffects = 0 in
def VMOVSRR : AVConv5I<0b11000100, 0b1010,
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
@@ -1003,7 +1022,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(f64 (sint_to_fp GPR:$a)),
(VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
- def : VFPPat<(f64 (sint_to_fp (i32 (load addrmode5:$a)))),
+ def : VFPPat<(f64 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VSITOD (VLDRS addrmode5:$a))>;
}
@@ -1021,7 +1040,7 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)),
(VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (load addrmode5:$a)))),
+def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VSITOS (VLDRS addrmode5:$a))>;
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
@@ -1035,7 +1054,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(f64 (uint_to_fp GPR:$a)),
(VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
- def : VFPPat<(f64 (uint_to_fp (i32 (load addrmode5:$a)))),
+ def : VFPPat<(f64 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VUITOD (VLDRS addrmode5:$a))>;
}
@@ -1053,7 +1072,7 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)),
(VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (load addrmode5:$a)))),
+def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VUITOS (VLDRS addrmode5:$a))>;
// FP -> Int:
@@ -1106,7 +1125,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
(COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
- def : VFPPat<(store (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
+ def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
(VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
}
@@ -1124,7 +1143,8 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)),
(COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
-def : VFPNoNEONPat<(store (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr),
+def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
+ addrmode5:$ptr),
(VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
@@ -1138,7 +1158,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
(COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
- def : VFPPat<(store (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
+ def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
(VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
}
@@ -1156,7 +1176,8 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)),
(COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
-def : VFPNoNEONPat<(store (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr),
+def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
+ addrmode5:$ptr),
(VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 265b86f..725b838 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -60,17 +60,24 @@ STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
+namespace llvm {
+void initializeARMLoadStoreOptPass(PassRegistry &);
+}
+
+#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
+
namespace {
/// Post- register allocation pass the combine load / store instructions to
/// form ldm / stm instructions.
struct ARMLoadStoreOpt : public MachineFunctionPass {
static char ID;
- ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {
+ initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry());
+ }
const MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- const MachineRegisterInfo *MRI;
const ARMSubtarget *STI;
const TargetLowering *TL;
ARMFunctionInfo *AFI;
@@ -84,7 +91,7 @@ namespace {
bool runOnMachineFunction(MachineFunction &Fn) override;
const char *getPassName() const override {
- return "ARM load / store optimization pass";
+ return ARM_LOAD_STORE_OPT_NAME;
}
private:
@@ -118,6 +125,7 @@ namespace {
};
SpecificBumpPtrAllocator<MergeCandidate> Allocator;
SmallVector<const MergeCandidate*,4> Candidates;
+ SmallVector<MachineInstr*,4> MergeBaseCandidates;
void moveLiveRegsBefore(const MachineBasicBlock &MBB,
MachineBasicBlock::const_iterator Before);
@@ -140,12 +148,16 @@ namespace {
MachineBasicBlock::iterator &MBBI);
bool MergeBaseUpdateLoadStore(MachineInstr *MI);
bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
+ bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+ bool CombineMovBx(MachineBasicBlock &MBB);
};
char ARMLoadStoreOpt::ID = 0;
}
+INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false)
+
static bool definesCPSR(const MachineInstr *MI) {
for (const auto &MO : MI->operands()) {
if (!MO.isReg())
@@ -619,9 +631,10 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
unsigned NewBase;
if (isi32Load(Opcode)) {
- // If it is a load, then just use one of the destination register to
- // use as the new base.
+ // If it is a load, then just use one of the destination registers
+ // as the new base. Will no longer be writeback in Thumb1.
NewBase = Regs[NumRegs-1].first;
+ Writeback = false;
} else {
// Find a free register that we can use as scratch register.
moveLiveRegsBefore(MBB, InsertBefore);
@@ -725,9 +738,12 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
MachineInstrBuilder MIB;
if (Writeback) {
- if (Opcode == ARM::tLDMIA)
+ assert(isThumb1 && "expected Writeback only inThumb1");
+ if (Opcode == ARM::tLDMIA) {
+ assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
// Update tLDMIA with writeback if necessary.
Opcode = ARM::tLDMIA_UPD;
+ }
MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
@@ -784,6 +800,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
SmallVector<std::pair<unsigned, bool>, 8> Regs;
SmallVector<unsigned, 4> ImpDefs;
DenseSet<unsigned> KilledRegs;
+ DenseSet<unsigned> UsedRegs;
// Determine list of registers and list of implicit super-register defs.
for (const MachineInstr *MI : Cand.Instrs) {
const MachineOperand &MO = getLoadStoreRegOp(*MI);
@@ -792,6 +809,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
if (IsKill)
KilledRegs.insert(Reg);
Regs.push_back(std::make_pair(Reg, IsKill));
+ UsedRegs.insert(Reg);
if (IsLoad) {
// Collect any implicit defs of super-registers, after merging we can't
@@ -881,7 +899,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
for (MachineOperand &MO : MI.uses()) {
if (!MO.isReg() || !MO.isKill())
continue;
- if (KilledRegs.count(MO.getReg()))
+ if (UsedRegs.count(MO.getReg()))
MO.setIsKill(false);
}
}
@@ -995,76 +1013,6 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
} while (SIndex < EIndex);
}
-static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
- unsigned Bytes, unsigned Limit,
- ARMCC::CondCodes Pred, unsigned PredReg) {
- unsigned MyPredReg = 0;
- if (!MI)
- return false;
-
- bool CheckCPSRDef = false;
- switch (MI->getOpcode()) {
- default: return false;
- case ARM::tSUBi8:
- case ARM::t2SUBri:
- case ARM::SUBri:
- CheckCPSRDef = true;
- break;
- case ARM::tSUBspi:
- break;
- }
-
- // Make sure the offset fits in 8 bits.
- if (Bytes == 0 || (Limit && Bytes >= Limit))
- return false;
-
- unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
- MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
- if (!(MI->getOperand(0).getReg() == Base &&
- MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm() * Scale) == Bytes &&
- getInstrPredicate(MI, MyPredReg) == Pred &&
- MyPredReg == PredReg))
- return false;
-
- return CheckCPSRDef ? !definesCPSR(MI) : true;
-}
-
-static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
- unsigned Bytes, unsigned Limit,
- ARMCC::CondCodes Pred, unsigned PredReg) {
- unsigned MyPredReg = 0;
- if (!MI)
- return false;
-
- bool CheckCPSRDef = false;
- switch (MI->getOpcode()) {
- default: return false;
- case ARM::tADDi8:
- case ARM::t2ADDri:
- case ARM::ADDri:
- CheckCPSRDef = true;
- break;
- case ARM::tADDspi:
- break;
- }
-
- if (Bytes == 0 || (Limit && Bytes >= Limit))
- // Make sure the offset fits in 8 bits.
- return false;
-
- unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
- MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
- if (!(MI->getOperand(0).getReg() == Base &&
- MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm() * Scale) == Bytes &&
- getInstrPredicate(MI, MyPredReg) == Pred &&
- MyPredReg == PredReg))
- return false;
-
- return CheckCPSRDef ? !definesCPSR(MI) : true;
-}
-
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
ARM_AM::AMSubMode Mode) {
switch (Opc) {
@@ -1132,6 +1080,75 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
}
}
+/// Check if the given instruction increments or decrements a register and
+/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
+/// generated by the instruction are possibly read as well.
+static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
+ bool CheckCPSRDef;
+ int Scale;
+ switch (MI.getOpcode()) {
+ case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
+ case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
+ case ARM::t2SUBri:
+ case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
+ case ARM::t2ADDri:
+ case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
+ case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
+ case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
+ default: return 0;
+ }
+
+ unsigned MIPredReg;
+ if (MI.getOperand(0).getReg() != Reg ||
+ MI.getOperand(1).getReg() != Reg ||
+ getInstrPredicate(&MI, MIPredReg) != Pred ||
+ MIPredReg != PredReg)
+ return 0;
+
+ if (CheckCPSRDef && definesCPSR(&MI))
+ return 0;
+ return MI.getOperand(2).getImm() * Scale;
+}
+
+/// Searches for an increment or decrement of \p Reg before \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+ Offset = 0;
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (MBBI == BeginMBBI)
+ return EndMBBI;
+
+ // Skip debug values.
+ MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
+ while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
+ --PrevMBBI;
+
+ Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
+ return Offset == 0 ? EndMBBI : PrevMBBI;
+}
+
+/// Searches for a increment or decrement of \p Reg after \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+ Offset = 0;
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
+ // Skip debug values.
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
+ if (NextMBBI == EndMBBI)
+ return EndMBBI;
+
+ Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
+ return Offset == 0 ? EndMBBI : NextMBBI;
+}
+
/// Fold proceeding/trailing inc/dec of base register into the
/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
///
@@ -1151,7 +1168,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
const MachineOperand &BaseOP = MI->getOperand(0);
unsigned Base = BaseOP.getReg();
bool BaseKill = BaseOP.isKill();
- unsigned Bytes = getLSMultipleTransferSize(MI);
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
unsigned Opcode = MI->getOpcode();
@@ -1163,49 +1179,24 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
if (MI->getOperand(i).getReg() == Base)
return false;
- bool DoMerge = false;
- ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
-
- // Try merging with the previous instruction.
+ int Bytes = getLSMultipleTransferSize(MI);
MachineBasicBlock &MBB = *MI->getParent();
- MachineBasicBlock::iterator BeginMBBI = MBB.begin();
MachineBasicBlock::iterator MBBI(MI);
- if (MBBI != BeginMBBI) {
- MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
- while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
- --PrevMBBI;
- if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- Mode = ARM_AM::db;
- DoMerge = true;
- } else if (Mode == ARM_AM::ib &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- Mode = ARM_AM::da;
- DoMerge = true;
- }
- if (DoMerge)
- MBB.erase(PrevMBBI);
- }
-
- // Try merging with the next instruction.
- MachineBasicBlock::iterator EndMBBI = MBB.end();
- if (!DoMerge && MBBI != EndMBBI) {
- MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
- while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
- ++NextMBBI;
- if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
- isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- }
- if (DoMerge)
- MBB.erase(NextMBBI);
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr
+ = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
+ ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
+ if (Mode == ARM_AM::ia && Offset == -Bytes) {
+ Mode = ARM_AM::db;
+ } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
+ Mode = ARM_AM::da;
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
+ ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
+ return false;
}
-
- if (!DoMerge)
- return false;
+ MBB.erase(MergeInstr);
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
@@ -1283,7 +1274,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
unsigned Base = getLoadStoreBaseOp(*MI).getReg();
bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
- unsigned Bytes = getLSMultipleTransferSize(MI);
unsigned Opcode = MI->getOpcode();
DebugLoc DL = MI->getDebugLoc();
bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
@@ -1295,7 +1285,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
return false;
- bool isLd = isLoadSingle(Opcode);
// Can't do the merge if the destination register is the same as the would-be
// writeback register.
if (MI->getOperand(0).getReg() == Base)
@@ -1303,55 +1292,31 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
- bool DoMerge = false;
- ARM_AM::AddrOpc AddSub = ARM_AM::add;
- unsigned NewOpc = 0;
- // AM2 - 12 bits, thumb2 - 8 bits.
- unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
-
- // Try merging with the previous instruction.
+ int Bytes = getLSMultipleTransferSize(MI);
MachineBasicBlock &MBB = *MI->getParent();
- MachineBasicBlock::iterator BeginMBBI = MBB.begin();
MachineBasicBlock::iterator MBBI(MI);
- if (MBBI != BeginMBBI) {
- MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
- while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
- --PrevMBBI;
- if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
- DoMerge = true;
- AddSub = ARM_AM::sub;
- } else if (!isAM5 &&
- isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
- DoMerge = true;
- }
- if (DoMerge) {
- NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
- MBB.erase(PrevMBBI);
- }
- }
-
- // Try merging with the next instruction.
- MachineBasicBlock::iterator EndMBBI = MBB.end();
- if (!DoMerge && MBBI != EndMBBI) {
- MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
- while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
- ++NextMBBI;
- if (!isAM5 &&
- isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
- DoMerge = true;
- AddSub = ARM_AM::sub;
- } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
- DoMerge = true;
- }
- if (DoMerge) {
- NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
- MBB.erase(NextMBBI);
- }
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr
+ = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
+ unsigned NewOpc;
+ if (!isAM5 && Offset == Bytes) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+ } else if (Offset == -Bytes) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (Offset == Bytes) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+ } else if (!isAM5 && Offset == -Bytes) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+ } else
+ return false;
}
+ MBB.erase(MergeInstr);
- if (!DoMerge)
- return false;
+ ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
+ bool isLd = isLoadSingle(Opcode);
if (isAM5) {
// VLDM[SD]_UPD, VSTM[SD]_UPD
// (There are no base-updating versions of VLDR/VSTR instructions, but the
@@ -1368,18 +1333,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
if (isAM2) {
// LDR_PRE, LDR_POST
if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
} else {
- int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
- .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
}
} else {
- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2LDR_PRE, t2LDR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
@@ -1391,13 +1354,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
// the vestigal zero-reg offset register. When that's fixed, this clause
// can be removed entirely.
if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
- int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
// STR_PRE, STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
- .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
} else {
- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2STR_PRE, t2STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
@@ -1409,46 +1371,75 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
return true;
}
-/// Returns true if instruction is a memory operation that this pass is capable
-/// of operating on.
-static bool isMemoryOp(const MachineInstr *MI) {
- // When no memory operands are present, conservatively assume unaligned,
- // volatile, unfoldable.
- if (!MI->hasOneMemOperand())
+bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
+ "Must have t2STRDi8 or t2LDRDi8");
+ if (MI.getOperand(3).getImm() != 0)
return false;
- const MachineMemOperand *MMO = *MI->memoperands_begin();
-
- // Don't touch volatile memory accesses - we may be changing their order.
- if (MMO->isVolatile())
+ // Behaviour for writeback is undefined if base register is the same as one
+ // of the others.
+ const MachineOperand &BaseOp = MI.getOperand(2);
+ unsigned Base = BaseOp.getReg();
+ const MachineOperand &Reg0Op = MI.getOperand(0);
+ const MachineOperand &Reg1Op = MI.getOperand(1);
+ if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
return false;
- // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
- // not.
- if (MMO->getAlignment() < 4)
- return false;
+ unsigned PredReg;
+ ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
+ MachineBasicBlock::iterator MBBI(MI);
+ MachineBasicBlock &MBB = *MI.getParent();
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
+ PredReg, Offset);
+ unsigned NewOpc;
+ if (Offset == 8 || Offset == -8) {
+ NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (Offset == 8 || Offset == -8) {
+ NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
+ } else
+ return false;
+ }
+ MBB.erase(MergeInstr);
- // str <undef> could probably be eliminated entirely, but for now we just want
- // to avoid making a mess of it.
- // FIXME: Use str <undef> as a wildcard to enable better stm folding.
- if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
- MI->getOperand(0).isUndef())
- return false;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
+ if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
+ MIB.addOperand(Reg0Op).addOperand(Reg1Op)
+ .addReg(BaseOp.getReg(), RegState::Define);
+ } else {
+ assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
+ MIB.addReg(BaseOp.getReg(), RegState::Define)
+ .addOperand(Reg0Op).addOperand(Reg1Op);
+ }
+ MIB.addReg(BaseOp.getReg(), RegState::Kill)
+ .addImm(Offset).addImm(Pred).addReg(PredReg);
+ assert(TII->get(Opcode).getNumOperands() == 6 &&
+ TII->get(NewOpc).getNumOperands() == 7 &&
+ "Unexpected number of operands in Opcode specification.");
- // Likewise don't mess with references to undefined addresses.
- if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
- MI->getOperand(1).isUndef())
- return false;
+ // Transfer implicit operands.
+ for (const MachineOperand &MO : MI.implicit_operands())
+ MIB.addOperand(MO);
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- unsigned Opcode = MI->getOpcode();
+ MBB.erase(MBBI);
+ return true;
+}
+
+/// Returns true if instruction is a memory operation that this pass is capable
+/// of operating on.
+static bool isMemoryOp(const MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
switch (Opcode) {
- default: break;
case ARM::VLDRS:
case ARM::VSTRS:
- return MI->getOperand(1).isReg();
case ARM::VLDRD:
case ARM::VSTRD:
- return MI->getOperand(1).isReg();
case ARM::LDRi12:
case ARM::STRi12:
case ARM::tLDRi:
@@ -1459,9 +1450,40 @@ static bool isMemoryOp(const MachineInstr *MI) {
case ARM::t2LDRi12:
case ARM::t2STRi8:
case ARM::t2STRi12:
- return MI->getOperand(1).isReg();
+ break;
+ default:
+ return false;
}
- return false;
+ if (!MI.getOperand(1).isReg())
+ return false;
+
+ // When no memory operands are present, conservatively assume unaligned,
+ // volatile, unfoldable.
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand &MMO = **MI.memoperands_begin();
+
+ // Don't touch volatile memory accesses - we may be changing their order.
+ if (MMO.isVolatile())
+ return false;
+
+ // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+ // not.
+ if (MMO.getAlignment() < 4)
+ return false;
+
+ // str <undef> could probably be eliminated entirely, but for now we just want
+ // to avoid making a mess of it.
+ // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+ if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
+ return false;
+
+ // Likewise don't mess with references to undefined addresses.
+ if (MI.getOperand(1).isUndef())
+ return false;
+
+ return true;
}
static void InsertLDR_STR(MachineBasicBlock &MBB,
@@ -1616,6 +1638,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
ARMCC::CondCodes CurrPred = ARMCC::AL;
unsigned Position = 0;
assert(Candidates.size() == 0);
+ assert(MergeBaseCandidates.size() == 0);
LiveRegsValid = false;
for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
@@ -1626,7 +1649,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
continue;
++Position;
- if (isMemoryOp(MBBI)) {
+ if (isMemoryOp(*MBBI)) {
unsigned Opcode = MBBI->getOpcode();
const MachineOperand &MO = MBBI->getOperand(0);
unsigned Reg = MO.getReg();
@@ -1694,8 +1717,15 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
MBBI = I;
--Position;
// Fallthrough to look into existing chain.
- } else if (MBBI->isDebugValue())
+ } else if (MBBI->isDebugValue()) {
continue;
+ } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
+ MBBI->getOpcode() == ARM::t2STRDi8) {
+ // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
+ // remember them because we may still be able to merge add/sub into them.
+ MergeBaseCandidates.push_back(MBBI);
+ }
+
// If we are here then the chain is broken; Extract candidates for a merge.
if (MemOps.size() > 0) {
@@ -1726,7 +1756,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
if (Merged) {
Changed = true;
unsigned Opcode = Merged->getOpcode();
- if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8)
+ if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
+ MergeBaseUpdateLSDouble(*Merged);
+ else
MergeBaseUpdateLSMultiple(Merged);
} else {
for (MachineInstr *MI : Candidate->Instrs) {
@@ -1741,6 +1773,10 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
}
}
Candidates.clear();
+ // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
+ for (MachineInstr *MI : MergeBaseCandidates)
+ MergeBaseUpdateLSDouble(*MI);
+ MergeBaseCandidates.clear();
return Changed;
}
@@ -1765,7 +1801,11 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
(MBBI->getOpcode() == ARM::BX_RET ||
MBBI->getOpcode() == ARM::tBX_RET ||
MBBI->getOpcode() == ARM::MOVPCLR)) {
- MachineInstr *PrevMI = std::prev(MBBI);
+ MachineBasicBlock::iterator PrevI = std::prev(MBBI);
+ // Ignore any DBG_VALUE instructions.
+ while (PrevI->isDebugValue() && PrevI != MBB.begin())
+ --PrevI;
+ MachineInstr *PrevMI = PrevI;
unsigned Opcode = PrevMI->getOpcode();
if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
@@ -1786,6 +1826,30 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
return false;
}
+bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ if (MBBI == MBB.begin() || MBBI == MBB.end() ||
+ MBBI->getOpcode() != ARM::tBX_RET)
+ return false;
+
+ MachineBasicBlock::iterator Prev = MBBI;
+ --Prev;
+ if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
+ return false;
+
+ for (auto Use : Prev->uses())
+ if (Use.isKill()) {
+ AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
+ .addReg(Use.getReg(), RegState::Kill))
+ .copyImplicitOps(&*MBBI);
+ MBB.erase(MBBI);
+ MBB.erase(Prev);
+ return true;
+ }
+
+ llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
+}
+
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
MF = &Fn;
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
@@ -1793,7 +1857,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
- MRI = &Fn.getRegInfo();
+
RegClassInfoValid = false;
isThumb2 = AFI->isThumb2Function();
isThumb1 = AFI->isThumbFunction() && !isThumb2;
@@ -1805,18 +1869,29 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
Modified |= LoadStoreMultipleOpti(MBB);
if (STI->hasV5TOps())
Modified |= MergeReturnIntoLDM(MBB);
+ if (isThumb1)
+ Modified |= CombineMovBx(MBB);
}
Allocator.DestroyAll();
return Modified;
}
+namespace llvm {
+void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
+}
+
+#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
+ "ARM pre- register allocation load / store optimization pass"
+
namespace {
/// Pre- register allocation pass that move load / stores from consecutive
/// locations close to make it more likely they will be combined later.
struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
static char ID;
- ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {
+ initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry());
+ }
const DataLayout *TD;
const TargetInstrInfo *TII;
@@ -1828,7 +1903,7 @@ namespace {
bool runOnMachineFunction(MachineFunction &Fn) override;
const char *getPassName() const override {
- return "ARM pre- register allocation load / store optimization pass";
+ return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
}
private:
@@ -1847,8 +1922,11 @@ namespace {
char ARMPreAllocLoadStoreOpt::ID = 0;
}
+INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt",
+ ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
+
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- TD = Fn.getTarget().getDataLayout();
+ TD = &Fn.getDataLayout();
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
@@ -1856,9 +1934,8 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
MF = &Fn;
bool Modified = false;
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI)
- Modified |= RescheduleLoadStoreInstrs(MFI);
+ for (MachineBasicBlock &MFI : Fn)
+ Modified |= RescheduleLoadStoreInstrs(&MFI);
return Modified;
}
@@ -2187,7 +2264,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
if (!MI->isDebugValue())
MI2LocMap[MI] = ++Loc;
- if (!isMemoryOp(MI))
+ if (!isMemoryOp(*MI))
continue;
unsigned PredReg = 0;
if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
@@ -2275,3 +2352,4 @@ FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
return new ARMPreAllocLoadStoreOpt();
return new ARMLoadStoreOpt();
}
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index f5250ff..ac0330f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARMMachineFuctionInfo.cpp - ARM machine function info -------------===//
+//===-- ARMMachineFunctionInfo.cpp - ARM machine function info ------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,5 +20,4 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
RestoreSPFromFP(false), LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
- GlobalBaseReg(0) {}
+ PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 14dd9ef..d644797 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===-- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//===-- ARMMachineFunctionInfo.h - ARM machine function info ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -52,7 +52,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
unsigned ReturnRegsCount;
/// HasStackFrame - True if this function has a stack frame. Set by
- /// processFunctionBeforeCalleeSavedScan().
+ /// determineCalleeSaves().
bool HasStackFrame;
/// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
@@ -110,11 +110,6 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// pass.
DenseMap<unsigned, unsigned> CPEClones;
- /// GlobalBaseReg - keeps track of the virtual register initialized for
- /// use as the global base register. This is used for PIC in some PIC
- /// relocation models.
- unsigned GlobalBaseReg;
-
/// ArgumentStackSize - amount of bytes on stack consumed by the arguments
/// being passed on the stack
unsigned ArgumentStackSize;
@@ -133,7 +128,7 @@ public:
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
NumAlignedDPRCS2Regs(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
+ VarArgsFrameIndex(0), HasITBlocks(false) {}
explicit ARMFunctionInfo(MachineFunction &MF);
@@ -204,9 +199,6 @@ public:
bool hasITBlocks() const { return HasITBlocks; }
void setHasITBlocks(bool h) { HasITBlocks = h; }
- unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
- void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
-
void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
llvm_unreachable("Duplicate entries!");
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
index 45cc9ea..02cbfb1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -266,12 +266,19 @@ def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
}
// Scalar single precision floating point register class..
-// FIXME: Allocation order changed to s0, s2, s4, ... as a quick hack to
-// avoid partial-write dependencies on D registers (S registers are
-// renamed as portions of D registers).
-def SPR : RegisterClass<"ARM", [f32], 32, (add (decimate
- (sequence "S%u", 0, 31), 2),
- (sequence "S%u", 0, 31))>;
+// FIXME: Allocation order changed to s0, s2, ... or s0, s4, ... as a quick hack
+// to avoid partial-write dependencies on D or Q (depending on platform)
+// registers (S registers are renamed as portions of D/Q registers).
+def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)> {
+ let AltOrders = [(add (decimate SPR, 2), SPR),
+ (add (decimate SPR, 4),
+ (decimate SPR, 2),
+ (decimate (rotl SPR, 1), 4),
+ (decimate (rotl SPR, 1), 2))];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+ }];
+}
// Subset of SPR which can be used as a source of NEON scalars for 16-bit
// operations
@@ -281,25 +288,29 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>;
// class.
// ARM requires only word alignment for double. It's more performant if it
// is double-word alignment though.
-def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
(sequence "D%u", 0, 31)> {
- // Allocate non-VFP2 registers D16-D31 first.
- let AltOrders = [(rotl DPR, 16)];
- let AltOrderSelect = [{ return 1; }];
+ // Allocate non-VFP2 registers D16-D31 first, and prefer even registers on
+ // Darwin platforms.
+ let AltOrders = [(rotl DPR, 16),
+ (add (decimate (rotl DPR, 16), 2), (rotl DPR, 16))];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+ }];
}
// Subset of DPR that are accessible with VFP2 (and so that also have
// 32-bit SPR subregs).
-def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
(trunc DPR, 16)>;
// Subset of DPR which can be used as a source of NEON scalars for 16-bit
// operations
-def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
(trunc DPR, 8)>;
// Generic 128-bit vector register class.
-def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128,
(sequence "Q%u", 0, 15)> {
// Allocate non-VFP2 aliases Q8-Q15 first.
let AltOrders = [(rotl QPR, 8)];
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
index b03d5ff..3ad7730 100644
--- a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
@@ -37,1050 +37,13 @@ def SW_FDIV : FuncUnit;
// FIXME: Add preload instruction when it is documented.
// FIXME: Model non-pipelined nature of FP div / sqrt unit.
-def SwiftItineraries : ProcessorItineraries<
- [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
- //
- // Move instructions, unconditional
- InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2]>,
- InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [3]>,
- InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_LS]>],
- [5]>,
- //
- // MVN instructions
- InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- //
- // No operand cycles
- InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
- //
- // Binary Instructions that produce a result
- InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1, 1]>,
- //
- // Bitwise Instructions that produce a result
- InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1, 1]>,
- //
- // Unary Instructions that produce a result
-
- // CLZ, RBIT, etc.
- InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
-
- // BFC, BFI, UBFX, SBFX
- InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1]>,
-
- //
- // Zero and sign extension instructions
- InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1, 1]>,
- //
- // Compare instructions
- InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<2, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<2, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1]>,
- //
- // Test instructions
- InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<2, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<2, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1]>,
- //
- // Move instructions, conditional
- // FIXME: Correctly model the extra input dep on the destination.
- InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2]>,
-
- // Integer multiply pipeline
- //
- InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [3, 1, 1]>,
- InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [3, 1, 1, 1]>,
- InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1, 1]>,
- InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0], 1>,
- InstrStage<1, [SW_ALU0], 3>,
- InstrStage<1, [SW_ALU0]>],
- [5, 5, 1, 1]>,
- InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0], 1>,
- InstrStage<1, [SW_ALU0], 1>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [5, 6, 1, 1]>,
- //
- // Integer divide
- InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0], 0>,
- InstrStage<14, [SW_IDIV]>],
- [14, 1, 1]>,
-
- // Integer load pipeline
- // FIXME: The timings are some rough approximations
- //
- // Immediate offset
- InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1]>,
- InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1]>,
- InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_LS]>],
- [3, 4, 1]>,
- //
- // Register offset
- InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1]>,
- InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1]>,
- InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [3, 4, 1, 1]>,
- //
- // Scaled register offset
- InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS]>],
- [5, 1, 1]>,
- InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS]>],
- [5, 1, 1]>,
- //
- // Immediate offset with update
- InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1]>,
- InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1]>,
- //
- // Register offset with update
- InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0], 1>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1, 1]>,
- InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0], 1>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1, 1]>,
- InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [3, 4, 1, 1]>,
- //
- // Scaled register offset with update
- InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [5, 3, 1, 1]>,
- InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [5, 3, 1, 1]>,
- //
- // Load multiple, def is the 5th operand.
- // FIXME: This assumes 3 to 4 registers.
- InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 1, 3], [], -1>, // dynamic uops
-
- //
- // Load multiple + update, defs are the 1st and 5th operands.
- InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1, 1, 3], [], -1>, // dynamic uops
- //
- // Load multiple plus branch
- InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 1, 3], [], -1>, // dynamic uops
- //
- // Pop, def is the 3rd operand.
- InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 3], [], -1>, // dynamic uops
- //
- // Pop + branch, def is the 3rd operand.
- InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 3], [], -1>, // dynamic uops
-
- //
- // iLoadi + iALUr for t2LDRpci_pic.
- InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [4, 1]>,
-
- // Integer store pipeline
- ///
- // Immediate offset
- InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1]>,
- InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1]>,
- InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1]>,
- //
- // Register offset
- InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- //
- // Scaled register offset
- InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- //
- // Immediate offset with update
- InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- //
- // Register offset with update
- InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 1]>,
- InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 1]>,
- InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 1]>,
- //
- // Scaled register offset with update
- InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
- [3, 1, 1, 1]>,
- InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
- [3, 1, 1, 1]>,
- //
- // Store multiple
- InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [], [], -1>, // dynamic uops
- //
- // Store multiple + update
- InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [2], [], -1>, // dynamic uops
-
- //
- // Preload
- InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
-
- // Branch
- //
- // no delay slots, so the latency of a branch is unimportant
- InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>,
-
- // FP Special Register to Integer Register File Move
- InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- //
- // Single-precision FP Unary
- //
- // Most floating-point moves get issued on ALU0.
- InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1]>,
- //
- // Double-precision FP Unary
- InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1]>,
-
- //
- // Single-precision FP Compare
- InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [1, 1]>,
- //
- // Double-precision FP Compare
- InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [1, 1]>,
- //
- // Single to Double FP Convert
- InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
- //
- // Double to Single FP Convert
- InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
-
- //
- // Single to Half FP Convert
- InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU1], 4>,
- InstrStage<1, [SW_ALU1]>],
- [6, 1]>,
- //
- // Half to Single FP Convert
- InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
-
- //
- // Single-Precision FP to Integer Convert
- InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
- //
- // Double-Precision FP to Integer Convert
- InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
- //
- // Integer to Single-Precision FP Convert
- InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
- //
- // Integer to Double-Precision FP Convert
- InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
- //
- // Single-precision FP ALU
- InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Double-precision FP ALU
- InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Single-precision FP Multiply
- InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Double-precision FP Multiply
- InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [6, 1, 1]>,
- //
- // Single-precision FP MAC
- InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Double-precision FP MAC
- InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [12, 1, 1]>,
- //
- // Single-precision Fused FP MAC
- InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Double-precision Fused FP MAC
- InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [12, 1, 1]>,
- //
- // Single-precision FP DIV
- InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 0>,
- InstrStage<15, [SW_FDIV]>],
- [17, 1, 1]>,
- //
- // Double-precision FP DIV
- InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 0>,
- InstrStage<30, [SW_FDIV]>],
- [32, 1, 1]>,
- //
- // Single-precision FP SQRT
- InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 0>,
- InstrStage<15, [SW_FDIV]>],
- [17, 1]>,
- //
- // Double-precision FP SQRT
- InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 0>,
- InstrStage<30, [SW_FDIV]>],
- [32, 1, 1]>,
-
- //
- // Integer to Single-precision Move
- InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 4>,
- InstrStage<1, [SW_ALU0]>],
- [6, 1]>,
- //
- // Integer to Double-precision Move
- InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [4, 1]>,
- //
- // Single-precision to Integer Move
- InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1]>,
- //
- // Double-precision to Integer Move
- InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_LS]>],
- [3, 4, 1]>,
- //
- // Single-precision FP Load
- InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [4, 1]>,
- //
- // Double-precision FP Load
- InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [4, 1]>,
- //
- // FP Load Multiple
- // FIXME: Assumes a single Q register.
- InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 4], [], -1>, // dynamic uops
- //
- // FP Load Multiple + update
- // FIXME: Assumes a single Q register.
- InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 4>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1, 1, 4], [], -1>, // dynamic uops
- //
- // Single-precision FP Store
- InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1]>,
- //
- // Double-precision FP Store
- InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1]>,
- //
- // FP Store Multiple
- // FIXME: Assumes a single Q register.
- InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1], [], -1>, // dynamic uops
- //
- // FP Store Multiple + update
- // FIXME: Assumes a single Q register.
- InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 4>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1, 1], [], -1>, // dynamic uops
- // NEON
- //
- // Double-register Integer Unary
- InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1]>,
- //
- // Quad-register Integer Unary
- InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1]>,
- //
- // Double-register Integer Q-Unary
- InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1]>,
- //
- // Quad-register Integer CountQ-Unary
- InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1]>,
- //
- // Double-register Integer Binary
- InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Quad-register Integer Binary
- InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Double-register Integer Subtract
- InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Quad-register Integer Subtract
- InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Double-register Integer Shift
- InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Quad-register Integer Shift
- InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Double-register Integer Shift (4 cycle)
- InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Shift (4 cycle)
- InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Double-register Integer Binary (4 cycle)
- InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Binary (4 cycle)
- InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Double-register Integer Subtract (4 cycle)
- InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Subtract (4 cycle)
- InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
-
- //
- // Double-register Integer Count
- InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Quad-register Integer Count
- InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Double-register Absolute Difference and Accumulate
- InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1, 1]>,
- //
- // Quad-register Absolute Difference and Accumulate
- InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1, 1]>,
- //
- // Double-register Integer Pair Add Long
- InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Pair Add Long
- InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
-
- //
- // Double-register Integer Multiply (.8, .16)
- InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Multiply (.8, .16)
- InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
-
- //
- // Double-register Integer Multiply (.32)
- InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Multiply (.32)
- InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Double-register Integer Multiply-Accumulate (.8, .16)
- InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1, 1]>,
- //
- // Double-register Integer Multiply-Accumulate (.32)
- InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1, 1]>,
- //
- // Quad-register Integer Multiply-Accumulate (.8, .16)
- InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1, 1]>,
- //
- // Quad-register Integer Multiply-Accumulate (.32)
- InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1, 1]>,
-
- //
- // Move
- InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1]>,
- //
- // Move Immediate
- InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2]>,
- //
- // Double-register Permute Move
- InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1]>,
- //
- // Quad-register Permute Move
- InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1]>,
- //
- // Integer to Single-precision Move
- InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 4>,
- InstrStage<1, [SW_ALU0]>],
- [6, 1]>,
- //
- // Integer to Double-precision Move
- InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [4, 1, 1]>,
- //
- // Single-precision to Integer Move
- InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1]>,
- //
- // Double-precision to Integer Move
- InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_LS]>],
- [3, 4, 1]>,
- //
- // Integer to Lane Move
- // FIXME: I think this is correct, but it is not clear from the tuning guide.
- InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 4>,
- InstrStage<1, [SW_ALU0]>],
- [6, 1]>,
-
- //
- // Vector narrow move
- InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1]>,
- //
- // Double-register FP Unary
- // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
- // and they issue on a different pipeline.
- InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1]>,
- //
- // Quad-register FP Unary
- // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
- // and they issue on a different pipeline.
- InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1]>,
- //
- // Double-register FP Binary
- // FIXME: We're using this itin for many instructions.
- InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
-
- //
- // VPADD, etc.
- InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Double-register FP VMUL
- InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Quad-register FP Binary
- InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Quad-register FP VMUL
- InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Double-register FP Multiple-Accumulate
- InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Quad-register FP Multiple-Accumulate
- InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Double-register Fused FP Multiple-Accumulate
- InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Quad-register FusedF P Multiple-Accumulate
- InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Double-register Reciprical Step
- InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Quad-register Reciprical Step
- InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Double-register Permute
- // FIXME: The latencies are unclear from the documentation.
- InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [3, 4, 3, 4]>,
- //
- // Quad-register Permute
- // FIXME: The latencies are unclear from the documentation.
- InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [3, 4, 3, 4]>,
- //
- // Quad-register Permute (3 cycle issue on A9)
- InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [3, 4, 3, 4]>,
-
- //
- // Double-register VEXT
- InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1, 1]>,
- //
- // Quad-register VEXT
- InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1, 1]>,
- //
- // VTB
- InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 3, 3]>,
- InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [6, 1, 3, 5, 5]>,
- InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 3, 5, 7, 7]>,
- //
- // VTBX
- InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 3, 3]>,
- InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [6, 1, 3, 5, 5]>,
- InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 3, 5, 7, 7]>
-]>;
-
-// ===---------------------------------------------------------------------===//
-// This following definitions describe the simple machine model which
-// will replace itineraries.
-
// Swift machine model for scheduling and other instruction cost heuristics.
def SwiftModel : SchedMachineModel {
let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
let MicroOpBufferSize = 45; // Based on NEON renamed registers.
let LoadLatency = 3;
let MispredictPenalty = 14; // A branch direction mispredict.
-
- let Itineraries = SwiftItineraries;
+ let CompleteModel = 0; // FIXME: Remove if all instructions are covered.
}
// Swift predicates.
@@ -1558,6 +521,13 @@ let SchedModel = SwiftModel in {
(instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
"PUSH", "tPUSH")>;
+ // LDRLIT pseudo instructions, they expand to LDR + PICADD
+ def : InstRW<[SwiftWriteP2ThreeCycle, WriteALU],
+ (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
+ // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
+ def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2ThreeCycle],
+ (instregex "LDRLIT_ga_pcrel_ldr")>;
+
// 4.2.26 Branch
def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; }
def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; }
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 6cafbbb..6fded9c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -160,41 +160,39 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
unsigned VTSize = 4;
unsigned i = 0;
// Emit a maximum of 4 loads in Thumb1 since we have fewer registers
- const unsigned MAX_LOADS_IN_LDM = Subtarget.isThumb1Only() ? 4 : 6;
+ const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
SDValue TFOps[6];
SDValue Loads[6];
uint64_t SrcOff = 0, DstOff = 0;
- // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
- // same number of stores. The loads and stores will get combined into
- // ldm/stm later on.
- while (EmittedNumMemOps < NumMemOps) {
- for (i = 0;
- i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
- Loads[i] = DAG.getLoad(VT, dl, Chain,
- DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
- DAG.getConstant(SrcOff, dl, MVT::i32)),
- SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
- false, false, 0);
- TFOps[i] = Loads[i].getValue(1);
- SrcOff += VTSize;
- }
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- makeArrayRef(TFOps, i));
+ // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
+ // VLDM/VSTM and make this code emit it when appropriate. This would reduce
+ // pressure on the general purpose registers. However this seems harder to map
+ // onto the register allocator's view of the world.
- for (i = 0;
- i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
- TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
- DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
- DAG.getConstant(DstOff, dl, MVT::i32)),
- DstPtrInfo.getWithOffset(DstOff),
- isVolatile, false, 0);
- DstOff += VTSize;
- }
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- makeArrayRef(TFOps, i));
+ // The number of MEMCPY pseudo-instructions to emit. We use up to
+ // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
+ // later on. This is a lower bound on the number of MEMCPY operations we must
+ // emit.
+ unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
+
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
+
+ for (unsigned I = 0; I != NumMEMCPYs; ++I) {
+ // Evenly distribute registers among MEMCPY operations to reduce register
+ // pressure.
+ unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
+ unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
+
+ Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
+ DAG.getConstant(NumRegs, dl, MVT::i32));
+ Src = Dst.getValue(1);
+ Chain = Dst.getValue(2);
+
+ DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
+ SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
- EmittedNumMemOps += i;
+ EmittedNumMemOps = NextEmittedNumMemOps;
}
if (BytesLeft == 0)
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 002c3e9..bb6ae28 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
@@ -40,37 +41,9 @@ using namespace llvm;
#include "ARMGenSubtargetInfo.inc"
static cl::opt<bool>
-ReserveR9("arm-reserve-r9", cl::Hidden,
- cl::desc("Reserve R9, making it unavailable as GPR"));
-
-static cl::opt<bool>
-ArmUseMOVT("arm-use-movt", cl::init(true), cl::Hidden);
-
-static cl::opt<bool>
UseFusedMulOps("arm-use-mulops",
cl::init(true), cl::Hidden);
-namespace {
-enum AlignMode {
- DefaultAlign,
- StrictAlign,
- NoStrictAlign
-};
-}
-
-static cl::opt<AlignMode>
-Align(cl::desc("Load/store alignment support"),
- cl::Hidden, cl::init(DefaultAlign),
- cl::values(
- clEnumValN(DefaultAlign, "arm-default-align",
- "Generate unaligned accesses only on hardware/OS "
- "combinations that are known to support them"),
- clEnumValN(StrictAlign, "arm-strict-align",
- "Disallow all unaligned memory accesses"),
- clEnumValN(NoStrictAlign, "arm-no-strict-align",
- "Allow unaligned memory accesses"),
- clEnumValEnd));
-
enum ITMode {
DefaultIT,
RestrictedIT,
@@ -88,6 +61,12 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
"Allow IT blocks based on ARMv7"),
clEnumValEnd));
+/// ForceFastISel - Use the fast-isel, even for subtargets where it is not
+/// currently supported (for testing only).
+static cl::opt<bool>
+ForceFastISel("arm-force-fast-isel",
+ cl::init(false), cl::Hidden);
+
/// initializeSubtargetDependencies - Initializes using a CPU and feature string
/// so that we can use initializer lists for subtarget initialization.
ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -110,8 +89,8 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const ARMBaseTargetMachine &TM, bool IsLittle)
: ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
- ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
- TargetTriple(TT), Options(TM.Options), TM(TM),
+ ARMProcClass(None), ARMArch(ARMv4t), stackAlignment(4), CPUString(CPU),
+ IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM),
FrameLowering(initializeFrameLowering(CPU, FS)),
// At this point initializeSubtargetDependencies has been called so
// we can query directly.
@@ -133,6 +112,7 @@ void ARMSubtarget::initializeEnvironment() {
HasV7Ops = false;
HasV8Ops = false;
HasV8_1aOps = false;
+ HasV8_2aOps = false;
HasVFPv2 = false;
HasVFPv3 = false;
HasVFPv4 = false;
@@ -147,10 +127,11 @@ void ARMSubtarget::initializeEnvironment() {
UseSoftFloat = false;
HasThumb2 = false;
NoARM = false;
- IsR9Reserved = ReserveR9;
- UseMovt = false;
+ ReserveR9 = false;
+ NoMovt = false;
SupportsTailCall = false;
HasFP16 = false;
+ HasFullFP16 = false;
HasD16 = false;
HasHardwareDivide = false;
HasHardwareDivideInARM = false;
@@ -168,20 +149,36 @@ void ARMSubtarget::initializeEnvironment() {
HasCrypto = false;
HasCRC = false;
HasZeroCycleZeroing = false;
- AllowsUnalignedMem = false;
- Thumb2DSP = false;
+ StrictAlign = false;
+ HasDSP = false;
UseNaClTrap = false;
GenLongCalls = false;
UnsafeFPMath = false;
+
+ // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this
+ // directly from it, but we can try to make sure they're consistent when both
+ // available.
+ UseSjLjEH = isTargetDarwin() && !isTargetWatchOS();
+ assert((!TM.getMCAsmInfo() ||
+ (TM.getMCAsmInfo()->getExceptionHandlingType() ==
+ ExceptionHandling::SjLj) == UseSjLjEH) &&
+ "inconsistent sjlj choice between CodeGen and MC");
}
void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (CPUString.empty()) {
- if (isTargetDarwin() && TargetTriple.getArchName().endswith("v7s"))
- // Default to the Swift CPU when targeting armv7s/thumbv7s.
- CPUString = "swift";
- else
- CPUString = "generic";
+ CPUString = "generic";
+
+ if (isTargetDarwin()) {
+ StringRef ArchName = TargetTriple.getArchName();
+ if (ArchName.endswith("v7s"))
+ // Default to the Swift CPU when targeting armv7s/thumbv7s.
+ CPUString = "swift";
+ else if (ArchName.endswith("v7k"))
+ // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k.
+ // ARMv7k does not use SjLj exception handling.
+ CPUString = "cortex-a7";
+ }
}
// Insert the architecture feature derived from the target triple into the
@@ -212,44 +209,31 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isAAPCS_ABI())
stackAlignment = 8;
- if (isTargetNaCl())
+ if (isTargetNaCl() || isAAPCS16_ABI())
stackAlignment = 16;
- UseMovt = hasV6T2Ops() && ArmUseMOVT;
-
- if (isTargetMachO()) {
- IsR9Reserved = ReserveR9 || !HasV6Ops;
- SupportsTailCall = !isTargetIOS() || !getTargetTriple().isOSVersionLT(5, 0);
- } else {
- IsR9Reserved = ReserveR9;
- SupportsTailCall = !isThumb1Only();
- }
-
- if (Align == DefaultAlign) {
- // Assume pre-ARMv6 doesn't support unaligned accesses.
- //
- // ARMv6 may or may not support unaligned accesses depending on the
- // SCTLR.U bit, which is architecture-specific. We assume ARMv6
- // Darwin and NetBSD targets support unaligned accesses, and others don't.
- //
- // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
- // which raises an alignment fault on unaligned accesses. Linux
- // defaults this bit to 0 and handles it as a system-wide (not
- // per-process) setting. It is therefore safe to assume that ARMv7+
- // Linux targets support unaligned accesses. The same goes for NaCl.
- //
- // The above behavior is consistent with GCC.
- AllowsUnalignedMem =
- (hasV7Ops() && (isTargetLinux() || isTargetNaCl() ||
- isTargetNetBSD())) ||
- (hasV6Ops() && (isTargetMachO() || isTargetNetBSD()));
- } else {
- AllowsUnalignedMem = !(Align == StrictAlign);
- }
-
- // No v6M core supports unaligned memory access (v6M ARM ARM A3.2)
- if (isV6M())
- AllowsUnalignedMem = false;
+ // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
+ // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
+ // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
+ // support in the assembler and linker to be used. This would need to be
+ // fixed to fully support tail calls in Thumb1.
+ //
+ // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
+ // LR. This means if we need to reload LR, it takes an extra instructions,
+ // which outweighs the value of the tail call; but here we don't know yet
+ // whether LR is going to be used. Probably the right approach is to
+ // generate the tail call here and turn it back into CALL/RET in
+ // emitEpilogue if LR is used.
+
+ // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
+ // but we need to make sure there are enough registers; the only valid
+ // registers are the 4 used for parameters. We don't currently do this
+ // case.
+
+ SupportsTailCall = !isThumb1Only();
+
+ if (isTargetMachO() && isTargetIOS() && getTargetTriple().isOSVersionLT(5, 0))
+ SupportsTailCall = false;
switch (IT) {
case DefaultIT:
@@ -276,9 +260,15 @@ bool ARMSubtarget::isAPCS_ABI() const {
}
bool ARMSubtarget::isAAPCS_ABI() const {
assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
- return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS ||
+ TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
+}
+bool ARMSubtarget::isAAPCS16_ABI() const {
+ assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
+ return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
}
+
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
bool
ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
@@ -321,11 +311,23 @@ unsigned ARMSubtarget::getMispredictionPenalty() const {
}
bool ARMSubtarget::hasSinCos() const {
- return getTargetTriple().isiOS() && !getTargetTriple().isOSVersionLT(7, 0);
+ return isTargetWatchOS() ||
+ (isTargetIOS() && !getTargetTriple().isOSVersionLT(7, 0));
+}
+
+bool ARMSubtarget::enableMachineScheduler() const {
+ // Enable the MachineScheduler before register allocation for out-of-order
+ // architectures where we do not use the PostRA scheduler anymore (for now
+ // restricted to swift).
+ return getSchedModel().isOutOfOrder() && isSwift();
}
// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
bool ARMSubtarget::enablePostRAScheduler() const {
+ // No need for PostRA scheduling on out of order CPUs (for now restricted to
+ // swift).
+ if (getSchedModel().isOutOfOrder() && isSwift())
+ return false;
return (!isThumb() || hasThumb2());
}
@@ -333,15 +335,30 @@ bool ARMSubtarget::enableAtomicExpand() const {
return hasAnyDataBarrier() && !isThumb1Only();
}
+bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
+ // For general targets, the prologue can grow when VFPs are allocated with
+ // stride 4 (more vpush instructions). But WatchOS uses a compact unwind
+ // format which it's more important to get right.
+ return isTargetWatchOS() || (isSwift() && !MF.getFunction()->optForMinSize());
+}
+
bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
// NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
// immediates as it is inherently position independent, and may be out of
// range otherwise.
- return UseMovt && (isTargetWindows() ||
- !MF.getFunction()->hasFnAttribute(Attribute::MinSize));
+ return !NoMovt && hasV6T2Ops() &&
+ (isTargetWindows() || !MF.getFunction()->optForMinSize());
}
bool ARMSubtarget::useFastISel() const {
+ // Enable fast-isel for any target, for testing only.
+ if (ForceFastISel)
+ return true;
+
+ // Limit fast-isel to the targets that are or have been tested.
+ if (!hasV6Ops())
+ return false;
+
// Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
return TM.Options.EnableFastISel &&
((isTargetMachO() && !isThumb1Only()) ||
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index dd101df..a8b2801 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -43,11 +43,17 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
- CortexA17, CortexR4, CortexR4F, CortexR5, Swift, CortexA53, CortexA57, Krait,
+ CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexA35, CortexA53,
+ CortexA57, CortexA72, Krait, Swift
};
enum ARMProcClassEnum {
None, AClass, RClass, MClass
};
+ enum ARMArchEnum {
+ ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te,
+ ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r,
+ ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a
+ };
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
ARMProcFamilyEnum ARMProcFamily;
@@ -55,6 +61,9 @@ protected:
/// ARMProcClass - ARM processor class: None, AClass, RClass or MClass.
ARMProcClassEnum ARMProcClass;
+ /// ARMArch - ARM architecture
+ ARMArchEnum ARMArch;
+
/// HasV4TOps, HasV5TOps, HasV5TEOps,
/// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
/// Specify whether target support specific ARM ISA variants.
@@ -68,6 +77,7 @@ protected:
bool HasV7Ops;
bool HasV8Ops;
bool HasV8_1aOps;
+ bool HasV8_2aOps;
/// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
/// floating point ISAs are supported.
@@ -109,22 +119,24 @@ protected:
/// NoARM - True if subtarget does not support ARM mode execution.
bool NoARM;
- /// IsR9Reserved - True if R9 is a not available as general purpose register.
- bool IsR9Reserved;
+ /// ReserveR9 - True if R9 is not available as a general purpose register.
+ bool ReserveR9;
- /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit
- /// imms (including global addresses).
- bool UseMovt;
+ /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of
+ /// 32-bit imms (including global addresses).
+ bool NoMovt;
/// SupportsTailCall - True if the OS supports tail call. The dynamic linker
/// must be able to synthesize call stubs for interworking between ARM and
/// Thumb.
bool SupportsTailCall;
- /// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF
- /// only so far)
+ /// HasFP16 - True if subtarget supports half-precision FP conversions
bool HasFP16;
+ /// HasFullFP16 - True if subtarget supports half-precision FP operations
+ bool HasFullFP16;
+
/// HasD16 - True if subtarget is limited to 16 double precision
/// FP registers for VFPv3.
bool HasD16;
@@ -190,18 +202,18 @@ protected:
/// particularly effective at zeroing a VFP register.
bool HasZeroCycleZeroing;
- /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
+ /// StrictAlign - If true, the subtarget disallows unaligned memory
/// accesses for some types. For details, see
/// ARMTargetLowering::allowsMisalignedMemoryAccesses().
- bool AllowsUnalignedMem;
+ bool StrictAlign;
/// RestrictIT - If true, the subtarget disallows generation of deprecated IT
/// blocks to conform to ARMv8 rule.
bool RestrictIT;
- /// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith
- /// and such) instructions in Thumb2 code.
- bool Thumb2DSP;
+ /// HasDSP - If true, the subtarget supports the DSP (saturating arith
+ /// and such) instructions.
+ bool HasDSP;
/// NaCl TRAP instruction is generated instead of the regular TRAP.
bool UseNaClTrap;
@@ -212,6 +224,9 @@ protected:
/// Target machine allowed unsafe FP math (such as use of NEON fp)
bool UnsafeFPMath;
+ /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
+ bool UseSjLjEH;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -297,6 +312,7 @@ public:
bool hasV7Ops() const { return HasV7Ops; }
bool hasV8Ops() const { return HasV8Ops; }
bool hasV8_1aOps() const { return HasV8_1aOps; }
+ bool hasV8_2aOps() const { return HasV8_2aOps; }
bool isCortexA5() const { return ARMProcFamily == CortexA5; }
bool isCortexA7() const { return ARMProcFamily == CortexA7; }
@@ -343,17 +359,20 @@ public:
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
bool hasRAS() const { return HasRAS; }
bool hasMPExtension() const { return HasMPExtension; }
- bool hasThumb2DSP() const { return Thumb2DSP; }
+ bool hasDSP() const { return HasDSP; }
bool useNaClTrap() const { return UseNaClTrap; }
+ bool useSjLjEH() const { return UseSjLjEH; }
bool genLongCalls() const { return GenLongCalls; }
bool hasFP16() const { return HasFP16; }
bool hasD16() const { return HasD16; }
+ bool hasFullFP16() const { return HasFullFP16; }
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetIOS() const { return TargetTriple.isiOS(); }
+ bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
@@ -375,6 +394,11 @@ public:
TargetTriple.getEnvironment() == Triple::EABIHF) &&
!isTargetDarwin() && !isTargetWindows();
}
+ bool isTargetGNUAEABI() const {
+ return (TargetTriple.getEnvironment() == Triple::GNUEABI ||
+ TargetTriple.getEnvironment() == Triple::GNUEABIHF) &&
+ !isTargetDarwin() && !isTargetWindows();
+ }
// ARM Targets that support EHABI exception handling standard
// Darwin uses SjLj. Other targets might need more checks.
@@ -383,7 +407,7 @@ public:
TargetTriple.getEnvironment() == Triple::GNUEABI ||
TargetTriple.getEnvironment() == Triple::EABIHF ||
TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
- TargetTriple.getEnvironment() == Triple::Android) &&
+ isTargetAndroid()) &&
!isTargetDarwin() && !isTargetWindows();
}
@@ -391,14 +415,13 @@ public:
// FIXME: this is invalid for WindowsCE
return TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
TargetTriple.getEnvironment() == Triple::EABIHF ||
- isTargetWindows();
- }
- bool isTargetAndroid() const {
- return TargetTriple.getEnvironment() == Triple::Android;
+ isTargetWindows() || isAAPCS16_ABI();
}
+ bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
bool isAPCS_ABI() const;
bool isAAPCS_ABI() const;
+ bool isAAPCS16_ABI() const;
bool useSoftFloat() const { return UseSoftFloat; }
bool isThumb() const { return InThumbMode; }
@@ -409,17 +432,17 @@ public:
bool isRClass() const { return ARMProcClass == RClass; }
bool isAClass() const { return ARMProcClass == AClass; }
- bool isV6M() const {
- return isThumb1Only() && isMClass();
+ bool isR9Reserved() const {
+ return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9;
}
- bool isR9Reserved() const { return IsR9Reserved; }
+ bool useStride4VFPs(const MachineFunction &MF) const;
bool useMovt(const MachineFunction &MF) const;
bool supportsTailCall() const { return SupportsTailCall; }
- bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+ bool allowsUnalignedMem() const { return !StrictAlign; }
bool restrictIT() const { return RestrictIT; }
@@ -433,6 +456,9 @@ public:
/// compiler runtime or math libraries.
bool hasSinCos() const;
+ /// Returns true if machine scheduler should be enabled.
+ bool enableMachineScheduler() const override;
+
/// True for some subtargets at > -O0.
bool enablePostRAScheduler() const override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 93495d6..fca1901 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -66,7 +66,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
static ARMBaseTargetMachine::ARMABI
computeTargetABI(const Triple &TT, StringRef CPU,
const TargetOptions &Options) {
- if (Options.MCOptions.getABIName().startswith("aapcs"))
+ if (Options.MCOptions.getABIName() == "aapcs16")
+ return ARMBaseTargetMachine::ARM_ABI_AAPCS16;
+ else if (Options.MCOptions.getABIName().startswith("aapcs"))
return ARMBaseTargetMachine::ARM_ABI_AAPCS;
else if (Options.MCOptions.getABIName().startswith("apcs"))
return ARMBaseTargetMachine::ARM_ABI_APCS;
@@ -83,6 +85,8 @@ computeTargetABI(const Triple &TT, StringRef CPU,
(TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
CPU.startswith("cortex-m")) {
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ } else if (TT.isWatchOS()) {
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
} else {
TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
}
@@ -106,7 +110,7 @@ computeTargetABI(const Triple &TT, StringRef CPU,
if (TT.isOSNetBSD())
TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
else
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
break;
}
}
@@ -145,7 +149,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
// to 64. We always ty to give them natural alignment.
if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
Ret += "-v64:32:64-v128:32:128";
- else
+ else if (ABI != ARMBaseTargetMachine::ARM_ABI_AAPCS16)
Ret += "-v128:64:128";
// Try to align aggregates to 32 bits (the default is 64 bits, which has no
@@ -157,7 +161,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
// The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
// aligned everywhere else.
- if (TT.isOSNaCl())
+ if (TT.isOSNaCl() || ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16)
Ret += "-S128";
else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
Ret += "-S64";
@@ -184,6 +188,15 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
if (Options.FloatABIType == FloatABI::Default)
this->Options.FloatABIType =
Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft;
+
+ // Default to triple-appropriate EABI
+ if (Options.EABIVersion == EABI::Default ||
+ Options.EABIVersion == EABI::Unknown) {
+ if (Subtarget.isTargetGNUAEABI())
+ this->Options.EABIVersion = EABI::GNU;
+ else
+ this->Options.EABIVersion = EABI::EABI5;
+ }
}
ARMBaseTargetMachine::~ARMBaseTargetMachine() {}
@@ -225,12 +238,12 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
}
TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis(
- [this](Function &F) { return TargetTransformInfo(ARMTTIImpl(this, F)); });
+ return TargetIRAnalysis([this](const Function &F) {
+ return TargetTransformInfo(ARMTTIImpl(this, F));
+ });
}
-
-void ARMTargetMachine::anchor() { }
+void ARMTargetMachine::anchor() {}
ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -244,7 +257,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT,
"support ARM mode execution!");
}
-void ARMLETargetMachine::anchor() { }
+void ARMLETargetMachine::anchor() {}
ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -253,7 +266,7 @@ ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-void ARMBETargetMachine::anchor() { }
+void ARMBETargetMachine::anchor() {}
ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -262,7 +275,7 @@ ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
-void ThumbTargetMachine::anchor() { }
+void ThumbTargetMachine::anchor() {}
ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -273,7 +286,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT,
initAsmInfo();
}
-void ThumbLETargetMachine::anchor() { }
+void ThumbLETargetMachine::anchor() {}
ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -282,7 +295,7 @@ ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-void ThumbBETargetMachine::anchor() { }
+void ThumbBETargetMachine::anchor() {}
ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -348,7 +361,13 @@ bool ARMPassConfig::addPreISel() {
// tricky when doing code gen per function.
bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
(EnableGlobalMerge == cl::BOU_UNSET);
- addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize));
+ // Merging of extern globals is enabled by default on non-Mach-O as we
+ // expect it to be generally either beneficial or harmless. On Mach-O it
+ // is disabled as we emit the .subsections_via_symbols directive which
+ // means that merging extern globals is not safe.
+ bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
+ addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize,
+ MergeExternalByDefault));
}
return false;
@@ -356,9 +375,6 @@ bool ARMPassConfig::addPreISel() {
bool ARMPassConfig::addInstSelector() {
addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
-
- if (TM->getTargetTriple().isOSBinFormatELF() && TM->Options.EnableFastISel)
- addPass(createARMGlobalBaseRegPass());
return false;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
index 8c98e08..8ad1f3d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -26,7 +26,8 @@ public:
enum ARMABI {
ARM_ABI_UNKNOWN,
ARM_ABI_APCS,
- ARM_ABI_AAPCS // ARM EABI
+ ARM_ABI_AAPCS, // ARM EABI
+ ARM_ABI_AAPCS16
} TargetABI;
protected:
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 2f194cf..c152011 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -15,7 +15,7 @@ using namespace llvm;
#define DEBUG_TYPE "armtti"
-unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned Bits = Ty->getPrimitiveSizeInBits();
@@ -47,12 +47,12 @@ unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 3;
}
-unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
// Single to/from double precision conversions.
- static const CostTblEntry<MVT::SimpleValueType> NEONFltDblTbl[] = {
+ static const CostTblEntry NEONFltDblTbl[] = {
// Vector fptrunc/fpext conversions.
{ ISD::FP_ROUND, MVT::v2f64, 2 },
{ ISD::FP_EXTEND, MVT::v2f32, 2 },
@@ -61,10 +61,9 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
ISD == ISD::FP_EXTEND)) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
- int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second);
- if (Idx != -1)
- return LT.first * NEONFltDblTbl[Idx].Cost;
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+ if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
+ return LT.first * Entry->Cost;
}
EVT SrcTy = TLI->getValueType(DL, Src);
@@ -76,8 +75,7 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
// Some arithmetic, load and store operations have specific instructions
// to cast up/down their types automatically at no extra cost.
// TODO: Get these tables to know at least what the related operations are.
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- NEONVectorConversionTbl[] = {
+ static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
@@ -153,15 +151,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
};
if (SrcTy.isVector() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
- DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return NEONVectorConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
// Scalar float to integer conversions.
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- NEONFloatConversionTbl[] = {
+ static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
{ ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
@@ -184,15 +181,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
};
if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
- DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return NEONFloatConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
// Scalar integer to float conversions.
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- NEONIntegerConversionTbl[] = {
+ static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
{ ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
@@ -216,15 +212,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
};
if (SrcTy.isInteger() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, ISD,
- DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return NEONIntegerConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
// Scalar integer conversion costs.
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- ARMIntegerConversionTbl[] = {
+ static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
// i16 -> i64 requires two dependent operations.
{ ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
@@ -236,17 +231,17 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
};
if (SrcTy.isInteger()) {
- int Idx = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
- DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return ARMIntegerConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
- unsigned Index) {
+int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index) {
// Penalize inserting into an D-subregister. We end up with a three times
// lower estimated throughput on swift.
if (ST->isSwift() &&
@@ -255,28 +250,30 @@ unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
ValTy->getScalarSizeInBits() <= 32)
return 3;
- // Cross-class copies are expensive on many microarchitectures,
- // so assume they are expensive by default.
if ((Opcode == Instruction::InsertElement ||
- Opcode == Instruction::ExtractElement) &&
- ValTy->getVectorElementType()->isIntegerTy())
- return 3;
+ Opcode == Instruction::ExtractElement)) {
+ // Cross-class copies are expensive on many microarchitectures,
+ // so assume they are expensive by default.
+ if (ValTy->getVectorElementType()->isIntegerTy())
+ return 3;
+
+ // Even if it's not a cross class copy, this likely leads to mixing
+ // of NEON and VFP code and should be therefore penalized.
+ if (ValTy->isVectorTy() &&
+ ValTy->getScalarSizeInBits() <= 32)
+ return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
+ }
return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
}
-unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) {
+int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// On NEON a a vector select gets lowered to vbsl.
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
// Lowering of some vector selects is currently far from perfect.
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- NEONVectorSelectTbl[] = {
- { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
- { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
- { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
+ static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
{ ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
{ ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
@@ -285,21 +282,20 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
EVT SelCondTy = TLI->getValueType(DL, CondTy);
EVT SelValTy = TLI->getValueType(DL, ValTy);
if (SelCondTy.isSimple() && SelValTy.isSimple()) {
- int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
- SelCondTy.getSimpleVT(),
- SelValTy.getSimpleVT());
- if (Idx != -1)
- return NEONVectorSelectTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
+ SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT()))
+ return Entry->Cost;
}
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
return LT.first;
}
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
@@ -314,7 +310,7 @@ unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
return 1;
}
-unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
+int ARMTTIImpl::getFPOpCost(Type *Ty) {
// Use similar logic that's in ARMISelLowering:
// Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
// to VFP.
@@ -333,14 +329,14 @@ unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
return TargetTransformInfo::TCC_Expensive;
}
-unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
+int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
// We only handle costs of reverse and alternate shuffles for now.
if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
if (Kind == TTI::SK_Reverse) {
- static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
+ static const CostTblEntry NEONShuffleTbl[] = {
// Reverse shuffle cost one instruction if we are shuffling within a
// double word (vrev) or two if we shuffle a quad word (vrev, vext).
{ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
@@ -353,16 +349,16 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx == -1)
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE,
+ LT.second))
+ return LT.first * Entry->Cost;
- return LT.first * NEONShuffleTbl[Idx].Cost;
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
if (Kind == TTI::SK_Alternate) {
- static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = {
+ static const CostTblEntry NEONAltShuffleTbl[] = {
// Alt shuffle cost table for ARM. Cost is the number of instructions
// required to create the shuffled vector.
@@ -379,27 +375,26 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- int Idx =
- CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx == -1)
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
- return LT.first * NEONAltShuffleTbl[Idx].Cost;
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ if (const auto *Entry = CostTableLookup(NEONAltShuffleTbl,
+ ISD::VECTOR_SHUFFLE, LT.second))
+ return LT.first * Entry->Cost;
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
-unsigned ARMTTIImpl::getArithmeticInstrCost(
+int ARMTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
const unsigned FunctionCallDivCost = 20;
const unsigned ReciprocalDivCost = 10;
- static const CostTblEntry<MVT::SimpleValueType> CostTbl[] = {
+ static const CostTblEntry CostTbl[] = {
// Division.
// These costs are somewhat random. Choose a cost of 20 to indicate that
// vectorizing devision (added function call) is going to be very expensive.
@@ -440,16 +435,12 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
// Multiplication.
};
- int Idx = -1;
-
if (ST->hasNEON())
- Idx = CostTableLookup(CostTbl, ISDOpcode, LT.second);
-
- if (Idx != -1)
- return LT.first * CostTbl[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
+ return LT.first * Entry->Cost;
- unsigned Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
- Opd1PropInfo, Opd2PropInfo);
+ int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo);
// This is somewhat of a hack. The problem that we are facing is that SROA
// creates a sequence of shift, and, or instructions to construct values.
@@ -465,10 +456,9 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
return Cost;
}
-unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) {
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
if (Src->isVectorTy() && Alignment != 16 &&
Src->getVectorElementType()->isDoubleTy()) {
@@ -479,21 +469,21 @@ unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return LT.first;
}
-unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace) {
+int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
// vldN/vstN doesn't support vector types of i64/f64 element.
- bool EltIs64Bits = DL.getTypeAllocSizeInBits(VecTy->getScalarType()) == 64;
+ bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
unsigned NumElts = VecTy->getVectorNumElements();
Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
- unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy);
+ unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
// vldN/vstN only support legal vector types of size 64 or 128 in bits.
if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 84f256f..7d8d238 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -41,7 +41,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
const ARMTargetLowering *getTLI() const { return TLI; }
public:
- explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, Function &F)
+ explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
@@ -52,11 +52,13 @@ public:
: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
TLI(std::move(Arg.TLI)) {}
+ bool enableInterleavedAccessVectorization() { return true; }
+
/// \name Scalar TTI Implementations
/// @{
using BaseT::getIntImmCost;
- unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty);
/// @}
@@ -92,34 +94,31 @@ public:
return 1;
}
- unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp);
+ int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- unsigned getAddressComputationCost(Type *Val, bool IsComplex);
+ int getAddressComputationCost(Type *Val, bool IsComplex);
- unsigned getFPOpCost(Type *Ty);
+ int getFPOpCost(Type *Ty);
- unsigned getArithmeticInstrCost(
+ int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
- unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace);
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
+ ArrayRef<unsigned> Indices, unsigned Alignment,
+ unsigned AddressSpace);
/// @}
};
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index cf6b892..c69a741 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -129,7 +129,6 @@ public:
};
class ARMAsmParser : public MCTargetAsmParser {
- MCSubtargetInfo &STI;
const MCInstrInfo &MII;
const MCRegisterInfo *MRI;
UnwindContext UC;
@@ -247,48 +246,49 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandVector &Operands);
bool isThumb() const {
// FIXME: Can tablegen auto-generate this?
- return STI.getFeatureBits()[ARM::ModeThumb];
+ return getSTI().getFeatureBits()[ARM::ModeThumb];
}
bool isThumbOne() const {
- return isThumb() && !STI.getFeatureBits()[ARM::FeatureThumb2];
+ return isThumb() && !getSTI().getFeatureBits()[ARM::FeatureThumb2];
}
bool isThumbTwo() const {
- return isThumb() && STI.getFeatureBits()[ARM::FeatureThumb2];
+ return isThumb() && getSTI().getFeatureBits()[ARM::FeatureThumb2];
}
bool hasThumb() const {
- return STI.getFeatureBits()[ARM::HasV4TOps];
+ return getSTI().getFeatureBits()[ARM::HasV4TOps];
}
bool hasV6Ops() const {
- return STI.getFeatureBits()[ARM::HasV6Ops];
+ return getSTI().getFeatureBits()[ARM::HasV6Ops];
}
bool hasV6MOps() const {
- return STI.getFeatureBits()[ARM::HasV6MOps];
+ return getSTI().getFeatureBits()[ARM::HasV6MOps];
}
bool hasV7Ops() const {
- return STI.getFeatureBits()[ARM::HasV7Ops];
+ return getSTI().getFeatureBits()[ARM::HasV7Ops];
}
bool hasV8Ops() const {
- return STI.getFeatureBits()[ARM::HasV8Ops];
+ return getSTI().getFeatureBits()[ARM::HasV8Ops];
}
bool hasARM() const {
- return !STI.getFeatureBits()[ARM::FeatureNoARM];
+ return !getSTI().getFeatureBits()[ARM::FeatureNoARM];
}
- bool hasThumb2DSP() const {
- return STI.getFeatureBits()[ARM::FeatureDSPThumb2];
+ bool hasDSP() const {
+ return getSTI().getFeatureBits()[ARM::FeatureDSP];
}
bool hasD16() const {
- return STI.getFeatureBits()[ARM::FeatureD16];
+ return getSTI().getFeatureBits()[ARM::FeatureD16];
}
bool hasV8_1aOps() const {
- return STI.getFeatureBits()[ARM::HasV8_1aOps];
+ return getSTI().getFeatureBits()[ARM::HasV8_1aOps];
}
void SwitchMode() {
+ MCSubtargetInfo &STI = copySTI();
uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
setAvailableFeatures(FB);
}
bool isMClass() const {
- return STI.getFeatureBits()[ARM::FeatureMClass];
+ return getSTI().getFeatureBits()[ARM::FeatureMClass];
}
/// @name Auto-generated Match Functions
@@ -343,14 +343,15 @@ public:
Match_RequiresNotITBlock,
Match_RequiresV6,
Match_RequiresThumb2,
+ Match_RequiresV8,
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "ARMGenAsmMatcher.inc"
};
- ARMAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
+ ARMAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
- : STI(STI), MII(MII), UC(Parser) {
+ : MCTargetAsmParser(Options, STI), MII(MII), UC(Parser) {
MCAsmParserExtension::Initialize(Parser);
// Cache the MCRegisterInfo.
@@ -564,87 +565,6 @@ class ARMOperand : public MCParsedAsmOperand {
public:
ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
- ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
- Kind = o.Kind;
- StartLoc = o.StartLoc;
- EndLoc = o.EndLoc;
- switch (Kind) {
- case k_CondCode:
- CC = o.CC;
- break;
- case k_ITCondMask:
- ITMask = o.ITMask;
- break;
- case k_Token:
- Tok = o.Tok;
- break;
- case k_CCOut:
- case k_Register:
- Reg = o.Reg;
- break;
- case k_RegisterList:
- case k_DPRRegisterList:
- case k_SPRRegisterList:
- Registers = o.Registers;
- break;
- case k_VectorList:
- case k_VectorListAllLanes:
- case k_VectorListIndexed:
- VectorList = o.VectorList;
- break;
- case k_CoprocNum:
- case k_CoprocReg:
- Cop = o.Cop;
- break;
- case k_CoprocOption:
- CoprocOption = o.CoprocOption;
- break;
- case k_Immediate:
- Imm = o.Imm;
- break;
- case k_MemBarrierOpt:
- MBOpt = o.MBOpt;
- break;
- case k_InstSyncBarrierOpt:
- ISBOpt = o.ISBOpt;
- case k_Memory:
- Memory = o.Memory;
- break;
- case k_PostIndexRegister:
- PostIdxReg = o.PostIdxReg;
- break;
- case k_MSRMask:
- MMask = o.MMask;
- break;
- case k_BankedReg:
- BankedReg = o.BankedReg;
- break;
- case k_ProcIFlags:
- IFlags = o.IFlags;
- break;
- case k_ShifterImmediate:
- ShifterImm = o.ShifterImm;
- break;
- case k_ShiftedRegister:
- RegShiftedReg = o.RegShiftedReg;
- break;
- case k_ShiftedImmediate:
- RegShiftedImm = o.RegShiftedImm;
- break;
- case k_RotateImmediate:
- RotImm = o.RotImm;
- break;
- case k_ModifiedImmediate:
- ModImm = o.ModImm;
- break;
- case k_BitfieldDescriptor:
- Bitfield = o.Bitfield;
- break;
- case k_VectorIndex:
- VectorIndex = o.VectorIndex;
- break;
- }
- }
/// getStartLoc - Get the location of the first token of this operand.
SMLoc getStartLoc() const override { return StartLoc; }
@@ -4054,7 +3974,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
if (FlagsVal == ~0U)
return MatchOperand_NoMatch;
- if (!hasThumb2DSP() && (FlagsVal & 0x400))
+ if (!hasDSP() && (FlagsVal & 0x400))
// The _g and _nzcvqg versions are only valid if the DSP extension is
// available.
return MatchOperand_NoMatch;
@@ -5202,6 +5122,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// FALLTHROUGH
}
case AsmToken::Colon: {
+ S = Parser.getTok().getLoc();
// ":lower16:" and ":upper16:" expression prefixes
// FIXME: Check it's an expression prefix,
// e.g. (FOO - :lower16:BAR) isn't legal.
@@ -5220,8 +5141,9 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
return false;
}
case AsmToken::Equal: {
+ S = Parser.getTok().getLoc();
if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val)
- return Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return Error(S, "unexpected token in operand");
Parser.Lex(); // Eat '='
const MCExpr *SubExprVal;
@@ -5229,7 +5151,8 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
return true;
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- const MCExpr *CPLoc = getTargetStreamer().addConstantPoolEntry(SubExprVal);
+ const MCExpr *CPLoc =
+ getTargetStreamer().addConstantPoolEntry(SubExprVal, S);
Operands.push_back(ARMOperand::CreateImm(CPLoc, S, E));
return false;
}
@@ -5682,9 +5605,11 @@ bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic,
// VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON
unsigned RegIdx = 3;
if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") &&
- static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32") {
+ (static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32" ||
+ static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f16")) {
if (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
- static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32")
+ (static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32" ||
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f16"))
RegIdx = 4;
if (static_cast<ARMOperand &>(*Operands[RegIdx]).isReg() &&
@@ -8610,18 +8535,29 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR &&
inITBlock())
return Match_RequiresNotITBlock;
+ } else if (isThumbOne()) {
+ // Some high-register supporting Thumb1 encodings only allow both registers
+ // to be from r0-r7 when in Thumb2.
+ if (Opc == ARM::tADDhirr && !hasV6MOps() &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg()))
+ return Match_RequiresThumb2;
+ // Others only require ARMv6 or later.
+ else if (Opc == ARM::tMOVr && !hasV6Ops() &&
+ isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()))
+ return Match_RequiresV6;
}
- // Some high-register supporting Thumb1 encodings only allow both registers
- // to be from r0-r7 when in Thumb2.
- else if (Opc == ARM::tADDhirr && isThumbOne() && !hasV6MOps() &&
- isARMLowRegister(Inst.getOperand(1).getReg()) &&
- isARMLowRegister(Inst.getOperand(2).getReg()))
- return Match_RequiresThumb2;
- // Others only require ARMv6 or later.
- else if (Opc == ARM::tMOVr && isThumbOne() && !hasV6Ops() &&
- isARMLowRegister(Inst.getOperand(0).getReg()) &&
- isARMLowRegister(Inst.getOperand(1).getReg()))
- return Match_RequiresV6;
+
+ for (unsigned I = 0; I < MCID.NumOperands; ++I)
+ if (MCID.OpInfo[I].RegClass == ARM::rGPRRegClassID) {
+ // rGPRRegClass excludes PC, and also excluded SP before ARMv8
+ if ((Inst.getOperand(I).getReg() == ARM::SP) && !hasV8Ops())
+ return Match_RequiresV8;
+ else if (Inst.getOperand(I).getReg() == ARM::PC)
+ return Match_InvalidOperand;
+ }
+
return Match_Success;
}
@@ -8680,7 +8616,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return false;
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, STI);
+ Out.EmitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature: {
assert(ErrorInfo && "Unknown missing feature!");
@@ -8720,6 +8656,8 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "instruction variant requires ARMv6 or later");
case Match_RequiresThumb2:
return Error(IDLoc, "instruction variant requires Thumb2");
+ case Match_RequiresV8:
+ return Error(IDLoc, "instruction variant requires ARMv8 or later");
case Match_ImmRange0_15: {
SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
@@ -8868,7 +8806,7 @@ bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
return false;
}
- getParser().getStreamer().EmitValue(Value, Size);
+ getParser().getStreamer().EmitValue(Value, Size, L);
if (getLexer().is(AsmToken::EndOfStatement))
break;
@@ -9098,7 +9036,7 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
StringRef Arch = getParser().parseStringToEndOfStatement().trim();
- unsigned ID = ARMTargetParser::parseArch(Arch);
+ unsigned ID = ARM::parseArch(Arch);
if (ID == ARM::AK_INVALID) {
Error(L, "Unknown arch name");
@@ -9106,7 +9044,8 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
}
Triple T;
- STI.setDefaultFeatures(T.getARMCPUForArch(Arch));
+ MCSubtargetInfo &STI = copySTI();
+ STI.setDefaultFeatures("", ("+" + ARM::getArchName(ID)).str());
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
getTargetStreamer().emitArch(ID);
@@ -9233,12 +9172,13 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
// FIXME: This is using table-gen data, but should be moved to
// ARMTargetParser once that is table-gen'd.
- if (!STI.isCPUStringValid(CPU)) {
+ if (!getSTI().isCPUStringValid(CPU)) {
Error(L, "Unknown CPU name");
return false;
}
- STI.setDefaultFeatures(CPU);
+ MCSubtargetInfo &STI = copySTI();
+ STI.setDefaultFeatures(CPU, "");
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
return false;
@@ -9249,13 +9189,14 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
SMLoc FPUNameLoc = getTok().getLoc();
StringRef FPU = getParser().parseStringToEndOfStatement().trim();
- unsigned ID = ARMTargetParser::parseFPU(FPU);
+ unsigned ID = ARM::parseFPU(FPU);
std::vector<const char *> Features;
- if (!ARMTargetParser::getFPUFeatures(ID, Features)) {
+ if (!ARM::getFPUFeatures(ID, Features)) {
Error(FPUNameLoc, "Unknown FPU name");
return false;
}
+ MCSubtargetInfo &STI = copySTI();
for (auto Feature : Features)
STI.ApplyFeatureFlag(Feature);
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -9895,7 +9836,7 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
SMLoc ArchLoc = Parser.getTok().getLoc();
getLexer().Lex();
- unsigned ID = ARMTargetParser::parseArch(Arch);
+ unsigned ID = ARM::parseArch(Arch);
if (ID == ARM::AK_INVALID) {
Error(ArchLoc, "unknown architecture '" + Arch + "'");
@@ -9976,22 +9917,22 @@ extern "C" void LLVMInitializeARMAsmParser() {
// when we start to table-generate them, and we can use the ARM
// flags below, that were generated by table-gen.
static const struct {
- const ARM::ArchExtKind Kind;
- const unsigned ArchCheck;
+ const unsigned Kind;
+ const uint64_t ArchCheck;
const FeatureBitset Features;
} Extensions[] = {
{ ARM::AEK_CRC, Feature_HasV8, {ARM::FeatureCRC} },
{ ARM::AEK_CRYPTO, Feature_HasV8,
{ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
{ ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} },
- { ARM::AEK_HWDIV, Feature_HasV7 | Feature_IsNotMClass,
+ { (ARM::AEK_HWDIV | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass,
{ARM::FeatureHWDiv, ARM::FeatureHWDivARM} },
{ ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
{ ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
- // FIXME: Also available in ARMv6-K
- { ARM::AEK_SEC, Feature_HasV7, {ARM::FeatureTrustZone} },
+ { ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} },
// FIXME: Only available in A-class, isel not predicated
{ ARM::AEK_VIRT, Feature_HasV7, {ARM::FeatureVirtualization} },
+ { ARM::AEK_FP16, Feature_HasV8_2a, {ARM::FeatureFPARMv8, ARM::FeatureFullFP16} },
// FIXME: Unsupported extensions.
{ ARM::AEK_OS, Feature_None, {} },
{ ARM::AEK_IWMMXT, Feature_None, {} },
@@ -10020,7 +9961,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
EnableFeature = false;
Name = Name.substr(2);
}
- unsigned FeatureKind = ARMTargetParser::parseArchExt(Name);
+ unsigned FeatureKind = ARM::parseArchExt(Name);
if (FeatureKind == ARM::AEK_INVALID)
Error(ExtLoc, "unknown architectural extension: " + Name);
@@ -10037,6 +9978,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
return false;
}
+ MCSubtargetInfo &STI = copySTI();
FeatureBitset ToggleFeatures = EnableFeature
? (~STI.getFeatureBits() & Extension.Features)
: ( STI.getFeatureBits() & Extension.Features);
@@ -10078,6 +10020,10 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
"expression value must be representable in 32 bits");
}
break;
+ case MCK_rGPR:
+ if (hasV8Ops() && Op.isReg() && Op.getReg() == ARM::SP)
+ return Match_Success;
+ break;
case MCK_GPRPair:
if (Op.isReg() &&
MRI->getRegClass(ARM::GPRRegClassID).contains(Op.getReg()))
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 097ec04..e63defe 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -59,7 +59,7 @@ namespace {
}
// Called when decoding an IT instruction. Sets the IT state for the following
- // instructions that for the IT block. Firstcond and Mask correspond to the
+ // instructions that for the IT block. Firstcond and Mask correspond to the
// fields in the IT instruction encoding.
void setITState(char Firstcond, char Mask) {
// (3 - the number of trailing zeros) is the number of then / else.
@@ -459,21 +459,18 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// VFP and NEON instructions, similarly, are shared between ARM
// and Thumb modes.
- MI.clear();
Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
}
- MI.clear();
Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -485,7 +482,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address,
this, STI);
if (Result != MCDisassembler::Fail) {
@@ -497,7 +493,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -509,7 +504,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -517,7 +511,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -525,7 +518,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Size = 0;
return MCDisassembler::Fail;
}
@@ -718,7 +710,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result = decodeInstruction(DecoderTableThumbSBit16, MI, Insn16, Address, this,
STI);
if (Result) {
@@ -729,7 +720,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTableThumb216, MI, Insn16, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -763,7 +753,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
uint32_t Insn32 =
(Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16);
- MI.clear();
Result =
decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -774,7 +763,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTableThumb232, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -784,7 +772,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
- MI.clear();
Result =
decodeInstruction(DecoderTableVFP32, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -794,7 +781,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- MI.clear();
Result =
decodeInstruction(DecoderTableVFPV832, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -803,7 +789,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
- MI.clear();
Result = decodeInstruction(DecoderTableNEONDup32, MI, Insn32, Address, this,
STI);
if (Result != MCDisassembler::Fail) {
@@ -814,7 +799,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
if (fieldFromInstruction(Insn32, 24, 8) == 0xF9) {
- MI.clear();
uint32_t NEONLdStInsn = Insn32;
NEONLdStInsn &= 0xF0FFFFFF;
NEONLdStInsn |= 0x04000000;
@@ -828,7 +812,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
if (fieldFromInstruction(Insn32, 24, 4) == 0xF) {
- MI.clear();
uint32_t NEONDataInsn = Insn32;
NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
@@ -841,7 +824,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
uint32_t NEONCryptoInsn = Insn32;
NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24
NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
@@ -853,7 +835,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
uint32_t NEONv8Insn = Insn32;
NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26
Result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
@@ -864,7 +845,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- MI.clear();
Size = 0;
return MCDisassembler::Fail;
}
@@ -902,7 +882,7 @@ static DecodeStatus
DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
-
+
if (RegNo == 15)
S = MCDisassembler::SoftFail;
@@ -986,8 +966,13 @@ static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- if (RegNo == 13 || RegNo == 15)
+
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+ if ((RegNo == 13 && !featureBits[ARM::HasV8Ops]) || RegNo == 15)
S = MCDisassembler::SoftFail;
+
Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
return S;
}
@@ -1147,7 +1132,7 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
unsigned imm = fieldFromInstruction(Val, 7, 5);
// Register-immediate
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
ARM_AM::ShiftOpc Shift = ARM_AM::lsl;
@@ -1658,7 +1643,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
case ARM::STRD_POST:
if (P == 0 && W == 1)
S = MCDisassembler::SoftFail;
-
+
if (writeback && (Rn == 15 || Rn == Rt || Rn == Rt2))
S = MCDisassembler::SoftFail;
if (type && Rm == 15)
@@ -4131,7 +4116,7 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
// indicates the move for the GE{3:0} bits, the mask{0} bit can be set
// only if the processor includes the DSP extension.
if (Mask == 0 || (Mask != 2 && ValLow > 3) ||
- (!(FeatureBits[ARM::FeatureDSPThumb2]) && (Mask & 1)))
+ (!(FeatureBits[ARM::FeatureDSP]) && (Mask & 1)))
S = MCDisassembler::SoftFail;
}
}
@@ -5065,6 +5050,10 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
+ bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
+
unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
@@ -5075,10 +5064,35 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
DecodeStatus S = MCDisassembler::Success;
- // VMOVv2f32 is ambiguous with these decodings.
- if (!(imm & 0x38) && cmode == 0xF) {
- if (op == 1) return MCDisassembler::Fail;
- Inst.setOpcode(ARM::VMOVv2f32);
+ // If the top 3 bits of imm are clear, this is a VMOV (immediate)
+ if (!(imm & 0x38)) {
+ if (cmode == 0xF) {
+ if (op == 1) return MCDisassembler::Fail;
+ Inst.setOpcode(ARM::VMOVv2f32);
+ }
+ if (hasFullFP16) {
+ if (cmode == 0xE) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMOVv1i64);
+ } else {
+ Inst.setOpcode(ARM::VMOVv8i8);
+ }
+ }
+ if (cmode == 0xD) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv2i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv2i32);
+ }
+ }
+ if (cmode == 0xC) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv2i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv2i32);
+ }
+ }
+ }
return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
}
@@ -5095,6 +5109,10 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
+ bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
+
unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
@@ -5105,10 +5123,35 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
DecodeStatus S = MCDisassembler::Success;
- // VMOVv4f32 is ambiguous with these decodings.
- if (!(imm & 0x38) && cmode == 0xF) {
- if (op == 1) return MCDisassembler::Fail;
- Inst.setOpcode(ARM::VMOVv4f32);
+ // If the top 3 bits of imm are clear, this is a VMOV (immediate)
+ if (!(imm & 0x38)) {
+ if (cmode == 0xF) {
+ if (op == 1) return MCDisassembler::Fail;
+ Inst.setOpcode(ARM::VMOVv4f32);
+ }
+ if (hasFullFP16) {
+ if (cmode == 0xE) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMOVv2i64);
+ } else {
+ Inst.setOpcode(ARM::VMOVv16i8);
+ }
+ }
+ if (cmode == 0xD) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv4i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv4i32);
+ }
+ }
+ if (cmode == 0xC) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv4i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv4i32);
+ }
+ }
+ }
return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
}
@@ -5132,7 +5175,7 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
unsigned Rm = fieldFromInstruction(Val, 0, 4);
Rm |= (fieldFromInstruction(Val, 23, 1) << 4);
unsigned Cond = fieldFromInstruction(Val, 28, 4);
-
+
if (fieldFromInstruction(Val, 8, 4) != 0 || Rn == Rt)
S = MCDisassembler::SoftFail;
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 0bff521..33fc85a 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -804,7 +805,7 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
unsigned Opcode = MI->getOpcode();
// For writes, handle extended mask bits if the DSP extension is present.
- if (Opcode == ARM::t2MSR_M && FeatureBits[ARM::FeatureDSPThumb2]) {
+ if (Opcode == ARM::t2MSR_M && FeatureBits[ARM::FeatureDSP]) {
switch (SYSm) {
case 0x400:
O << "apsr_g";
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 3927c9f..52f7115 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -15,12 +15,9 @@
#define LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
-class MCOperand;
-
class ARMInstPrinter : public MCInstPrinter {
public:
ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 1114635..fa52c93 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -25,13 +25,17 @@
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/MachO.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -180,9 +184,8 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
return false;
}
-bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const {
+const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup,
+ uint64_t Value) const {
switch ((unsigned)Fixup.getKind()) {
case ARM::fixup_arm_thumb_br: {
// Relaxing tB to t2B. tB has a signed 12-bit displacement with the
@@ -192,7 +195,9 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
//
// Relax if the value is too big for a (signed) i8.
int64_t Offset = int64_t(Value) - 4;
- return Offset > 2046 || Offset < -2048;
+ if (Offset > 2046 || Offset < -2048)
+ return "out of range pc-relative fixup value";
+ break;
}
case ARM::fixup_arm_thumb_bcc: {
// Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
@@ -202,23 +207,40 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
//
// Relax if the value is too big for a (signed) i8.
int64_t Offset = int64_t(Value) - 4;
- return Offset > 254 || Offset < -256;
+ if (Offset > 254 || Offset < -256)
+ return "out of range pc-relative fixup value";
+ break;
}
case ARM::fixup_thumb_adr_pcrel_10:
case ARM::fixup_arm_thumb_cp: {
// If the immediate is negative, greater than 1020, or not a multiple
// of four, the wide version of the instruction must be used.
int64_t Offset = int64_t(Value) - 4;
- return Offset > 1020 || Offset < 0 || Offset & 3;
+ if (Offset & 3)
+ return "misaligned pc-relative fixup value";
+ else if (Offset > 1020 || Offset < 0)
+ return "out of range pc-relative fixup value";
+ break;
}
- case ARM::fixup_arm_thumb_cb:
+ case ARM::fixup_arm_thumb_cb: {
// If we have a Thumb CBZ or CBNZ instruction and its target is the next
// instruction it is is actually out of range for the instruction.
// It will be changed to a NOP.
int64_t Offset = (Value & ~1);
- return Offset == 2;
+ if (Offset == 2)
+ return "will be converted to nop";
+ break;
}
- llvm_unreachable("Unexpected fixup kind in fixupNeedsRelaxation()!");
+ default:
+ llvm_unreachable("Unexpected fixup kind in reasonForFixupRelaxation()!");
+ }
+ return nullptr;
+}
+
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ return reasonForFixupRelaxation(Fixup, Value);
}
void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
@@ -317,9 +339,10 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf,
return Value;
}
-static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- bool IsPCRel, MCContext *Ctx,
- bool IsLittleEndian) {
+unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
+ bool IsPCRel, MCContext *Ctx,
+ bool IsLittleEndian,
+ bool IsResolved) const {
unsigned Kind = Fixup.getKind();
switch (Kind) {
default:
@@ -372,8 +395,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value = -Value;
isAdd = false;
}
- if (Ctx && Value >= 4096)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Ctx && Value >= 4096) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
Value |= isAdd << 23;
// Same addressing mode as fixup_arm_pcrel_10,
@@ -383,8 +408,6 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return Value;
}
- case ARM::fixup_thumb_adr_pcrel_10:
- return ((Value - 4) >> 2) & 0xff;
case ARM::fixup_arm_adr_pcrel_12: {
// ARM PC-relative values are offset by 8.
Value -= 8;
@@ -393,8 +416,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value = -Value;
opc = 2; // 0b0010
}
- if (Ctx && ARM_AM::getSOImmVal(Value) == -1)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Ctx && ARM_AM::getSOImmVal(Value) == -1) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
// Encode the immediate and shift the opcode into place.
return ARM_AM::getSOImmVal(Value) | (opc << 21);
}
@@ -517,21 +542,44 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
((uint16_t)imm10LBits) << 1);
return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
}
+ case ARM::fixup_thumb_adr_pcrel_10:
case ARM::fixup_arm_thumb_cp:
- // Offset by 4, and don't encode the low two bits. Two bytes of that
- // 'off by 4' is implicitly handled by the half-word ordering of the
- // Thumb encoding, so we only need to adjust by 2 here.
- return ((Value - 2) >> 2) & 0xff;
+ // On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we
+ // could have an error on our hands.
+ if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ }
+ // Offset by 4, and don't encode the low two bits.
+ return ((Value - 4) >> 2) & 0xff;
case ARM::fixup_arm_thumb_cb: {
// Offset by 4 and don't encode the lower bit, which is always 0.
+ // FIXME: diagnose if no Thumb2
uint32_t Binary = (Value - 4) >> 1;
return ((Binary & 0x20) << 4) | ((Binary & 0x1f) << 3);
}
case ARM::fixup_arm_thumb_br:
// Offset by 4 and don't encode the lower bit, which is always 0.
+ if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) {
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ }
return ((Value - 4) >> 1) & 0x7ff;
case ARM::fixup_arm_thumb_bcc:
// Offset by 4 and don't encode the lower bit, which is always 0.
+ if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) {
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ }
return ((Value - 4) >> 1) & 0xff;
case ARM::fixup_arm_pcrel_10_unscaled: {
Value = Value - 8; // ARM fixups offset by an additional word and don't
@@ -542,8 +590,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
isAdd = false;
}
// The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
- if (Ctx && Value >= 256)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Ctx && Value >= 256) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
Value = (Value & 0xf) | ((Value & 0xf0) << 4);
return Value | (isAdd << 23);
}
@@ -561,8 +611,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
// These values don't encode the low two bits since they're always zero.
Value >>= 2;
- if (Ctx && Value >= 256)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Ctx && Value >= 256) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
Value |= isAdd << 23;
// Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
@@ -582,6 +634,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
const MCValue &Target, uint64_t &Value,
bool &IsResolved) {
const MCSymbolRefExpr *A = Target.getSymA();
+ const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
// Some fixups to thumb function symbols need the low bit (thumb bit)
// twiddled.
if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 &&
@@ -590,18 +643,21 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
(unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 &&
(unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
(unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
- if (A) {
- const MCSymbol &Sym = A->getSymbol();
- if (Asm.isThumbFunc(&Sym))
+ if (Sym) {
+ if (Asm.isThumbFunc(Sym))
Value |= 1;
}
}
- // For Thumb1 BL instruction, it is possible to be a long jump between
- // the basic blocks of the same function. Thus, we would like to resolve
- // the offset when the destination has the same MCFragment.
- if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
- const MCSymbol &Sym = A->getSymbol();
- IsResolved = (Sym.getFragment() == DF);
+ if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
+ assert(Sym && "How did we resolve this?");
+
+ // If the symbol is external the linker will handle it.
+ // FIXME: Should we handle it as an optimization?
+
+ // If the symbol is out of range, produce a relocation and hope the
+ // linker can handle it. GNU AS produces an error in this case.
+ if (Sym->isExternal() || Value >= 0x400004)
+ IsResolved = false;
}
// We must always generate a relocation for BL/BLX instructions if we have
// a symbol to reference, as the linker relies on knowing the destination
@@ -616,7 +672,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// the instruction. This allows adjustFixupValue() to issue a diagnostic
// if the value aren't invalid.
(void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(),
- IsLittleEndian);
+ IsLittleEndian, IsResolved);
}
/// getFixupKindNumBytes - The number of bytes the fixup may change.
@@ -719,7 +775,8 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
bool IsPCRel) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian);
+ Value =
+ adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian, true);
if (!Value)
return; // Doesn't change encoding.
@@ -743,6 +800,249 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
}
}
+namespace CU {
+
+/// \brief Compact unwind encoding values.
+enum CompactUnwindEncodings {
+ UNWIND_ARM_MODE_MASK = 0x0F000000,
+ UNWIND_ARM_MODE_FRAME = 0x01000000,
+ UNWIND_ARM_MODE_FRAME_D = 0x02000000,
+ UNWIND_ARM_MODE_DWARF = 0x04000000,
+
+ UNWIND_ARM_FRAME_STACK_ADJUST_MASK = 0x00C00000,
+
+ UNWIND_ARM_FRAME_FIRST_PUSH_R4 = 0x00000001,
+ UNWIND_ARM_FRAME_FIRST_PUSH_R5 = 0x00000002,
+ UNWIND_ARM_FRAME_FIRST_PUSH_R6 = 0x00000004,
+
+ UNWIND_ARM_FRAME_SECOND_PUSH_R8 = 0x00000008,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R9 = 0x00000010,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R10 = 0x00000020,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R11 = 0x00000040,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R12 = 0x00000080,
+
+ UNWIND_ARM_FRAME_D_REG_COUNT_MASK = 0x00000F00,
+
+ UNWIND_ARM_DWARF_SECTION_OFFSET = 0x00FFFFFF
+};
+
+} // end CU namespace
+
+/// Generate compact unwind encoding for the function based on the CFI
+/// instructions. If the CFI instructions describe a frame that cannot be
+/// encoded in compact unwind, the method returns UNWIND_ARM_MODE_DWARF which
+/// tells the runtime to fallback and unwind using dwarf.
+uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding(
+ ArrayRef<MCCFIInstruction> Instrs) const {
+ DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "generateCU()\n");
+ // Only armv7k uses CFI based unwinding.
+ if (Subtype != MachO::CPU_SUBTYPE_ARM_V7K)
+ return 0;
+ // No .cfi directives means no frame.
+ if (Instrs.empty())
+ return 0;
+ // Start off assuming CFA is at SP+0.
+ int CFARegister = ARM::SP;
+ int CFARegisterOffset = 0;
+ // Mark savable registers as initially unsaved
+ DenseMap<unsigned, int> RegOffsets;
+ int FloatRegCount = 0;
+ // Process each .cfi directive and build up compact unwind info.
+ for (size_t i = 0, e = Instrs.size(); i != e; ++i) {
+ int Reg;
+ const MCCFIInstruction &Inst = Instrs[i];
+ switch (Inst.getOperation()) {
+ case MCCFIInstruction::OpDefCfa: // DW_CFA_def_cfa
+ CFARegisterOffset = -Inst.getOffset();
+ CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ break;
+ case MCCFIInstruction::OpDefCfaOffset: // DW_CFA_def_cfa_offset
+ CFARegisterOffset = -Inst.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfaRegister: // DW_CFA_def_cfa_register
+ CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ break;
+ case MCCFIInstruction::OpOffset: // DW_CFA_offset
+ Reg = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ RegOffsets[Reg] = Inst.getOffset();
+ else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
+ RegOffsets[Reg] = Inst.getOffset();
+ ++FloatRegCount;
+ } else {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << ".cfi_offset on unknown register="
+ << Inst.getRegister() << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ break;
+ case MCCFIInstruction::OpRelOffset: // DW_CFA_advance_loc
+ // Ignore
+ break;
+ default:
+ // Directive not convertable to compact unwind, bail out.
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs()
+ << "CFI directive not compatiable with comact "
+ "unwind encoding, opcode=" << Inst.getOperation()
+ << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ break;
+ }
+ }
+
+ // If no frame set up, return no unwind info.
+ if ((CFARegister == ARM::SP) && (CFARegisterOffset == 0))
+ return 0;
+
+ // Verify standard frame (lr/r7) was used.
+ if (CFARegister != ARM::R7) {
+ DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "frame register is "
+ << CFARegister
+ << " instead of r7\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ int StackAdjust = CFARegisterOffset - 8;
+ if (RegOffsets.lookup(ARM::LR) != (-4 - StackAdjust)) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs()
+ << "LR not saved as standard frame, StackAdjust="
+ << StackAdjust
+ << ", CFARegisterOffset=" << CFARegisterOffset
+ << ", lr save at offset=" << RegOffsets[14] << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ if (RegOffsets.lookup(ARM::R7) != (-8 - StackAdjust)) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << "r7 not saved as standard frame\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ uint32_t CompactUnwindEncoding = CU::UNWIND_ARM_MODE_FRAME;
+
+ // If var-args are used, there may be a stack adjust required.
+ switch (StackAdjust) {
+ case 0:
+ break;
+ case 4:
+ CompactUnwindEncoding |= 0x00400000;
+ break;
+ case 8:
+ CompactUnwindEncoding |= 0x00800000;
+ break;
+ case 12:
+ CompactUnwindEncoding |= 0x00C00000;
+ break;
+ default:
+ DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs()
+ << ".cfi_def_cfa stack adjust ("
+ << StackAdjust << ") out of range\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+
+ // If r6 is saved, it must be right below r7.
+ static struct {
+ unsigned Reg;
+ unsigned Encoding;
+ } GPRCSRegs[] = {{ARM::R6, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R6},
+ {ARM::R5, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R5},
+ {ARM::R4, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R4},
+ {ARM::R12, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R12},
+ {ARM::R11, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R11},
+ {ARM::R10, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R10},
+ {ARM::R9, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R9},
+ {ARM::R8, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R8}};
+
+ int CurOffset = -8 - StackAdjust;
+ for (auto CSReg : GPRCSRegs) {
+ auto Offset = RegOffsets.find(CSReg.Reg);
+ if (Offset == RegOffsets.end())
+ continue;
+
+ int RegOffset = Offset->second;
+ if (RegOffset != CurOffset - 4) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << MRI.getName(CSReg.Reg) << " saved at "
+ << RegOffset << " but only supported at "
+ << CurOffset << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ CompactUnwindEncoding |= CSReg.Encoding;
+ CurOffset -= 4;
+ }
+
+ // If no floats saved, we are done.
+ if (FloatRegCount == 0)
+ return CompactUnwindEncoding;
+
+ // Switch mode to include D register saving.
+ CompactUnwindEncoding &= ~CU::UNWIND_ARM_MODE_MASK;
+ CompactUnwindEncoding |= CU::UNWIND_ARM_MODE_FRAME_D;
+
+ // FIXME: supporting more than 4 saved D-registers compactly would be trivial,
+ // but needs coordination with the linker and libunwind.
+ if (FloatRegCount > 4) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << "unsupported number of D registers saved ("
+ << FloatRegCount << ")\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+
+ // Floating point registers must either be saved sequentially, or we defer to
+ // DWARF. No gaps allowed here so check that each saved d-register is
+ // precisely where it should be.
+ static unsigned FPRCSRegs[] = { ARM::D8, ARM::D10, ARM::D12, ARM::D14 };
+ for (int Idx = FloatRegCount - 1; Idx >= 0; --Idx) {
+ auto Offset = RegOffsets.find(FPRCSRegs[Idx]);
+ if (Offset == RegOffsets.end()) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << FloatRegCount << " D-regs saved, but "
+ << MRI.getName(FPRCSRegs[Idx])
+ << " not saved\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ } else if (Offset->second != CurOffset - 8) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << FloatRegCount << " D-regs saved, but "
+ << MRI.getName(FPRCSRegs[Idx])
+ << " saved at " << Offset->second
+ << ", expected at " << CurOffset - 8
+ << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ CurOffset -= 8;
+ }
+
+ return CompactUnwindEncoding | ((FloatRegCount - 1) << 8);
+}
+
+static MachO::CPUSubTypeARM getMachOSubTypeFromArch(StringRef Arch) {
+ unsigned AK = ARM::parseArch(Arch);
+ switch (AK) {
+ default:
+ return MachO::CPU_SUBTYPE_ARM_V7;
+ case ARM::AK_ARMV4T:
+ return MachO::CPU_SUBTYPE_ARM_V4T;
+ case ARM::AK_ARMV5T:
+ case ARM::AK_ARMV5TE:
+ case ARM::AK_ARMV5TEJ:
+ return MachO::CPU_SUBTYPE_ARM_V5;
+ case ARM::AK_ARMV6:
+ case ARM::AK_ARMV6K:
+ return MachO::CPU_SUBTYPE_ARM_V6;
+ case ARM::AK_ARMV7A:
+ return MachO::CPU_SUBTYPE_ARM_V7;
+ case ARM::AK_ARMV7S:
+ return MachO::CPU_SUBTYPE_ARM_V7S;
+ case ARM::AK_ARMV7K:
+ return MachO::CPU_SUBTYPE_ARM_V7K;
+ case ARM::AK_ARMV6M:
+ return MachO::CPU_SUBTYPE_ARM_V6M;
+ case ARM::AK_ARMV7M:
+ return MachO::CPU_SUBTYPE_ARM_V7M;
+ case ARM::AK_ARMV7EM:
+ return MachO::CPU_SUBTYPE_ARM_V7EM;
+ }
+}
+
MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const Triple &TheTriple, StringRef CPU,
@@ -751,19 +1051,8 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
default:
llvm_unreachable("unsupported object format");
case Triple::MachO: {
- MachO::CPUSubTypeARM CS =
- StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName())
- .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T)
- .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ)
- .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6)
- .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M)
- .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM)
- .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K)
- .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M)
- .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S)
- .Default(MachO::CPU_SUBTYPE_ARM_V7);
-
- return new ARMAsmBackendDarwin(T, TheTriple, CS);
+ MachO::CPUSubTypeARM CS = getMachOSubTypeFromArch(TheTriple.getArchName());
+ return new ARMAsmBackendDarwin(T, TheTriple, MRI, CS);
}
case Triple::COFF:
assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 6b4abd5..28a6213 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -45,6 +45,10 @@ public:
const MCValue &Target, uint64_t &Value,
bool &IsResolved) override;
+ unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel,
+ MCContext *Ctx, bool IsLittleEndian,
+ bool IsResolved) const;
+
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value, bool IsPCRel) const override;
@@ -52,6 +56,9 @@ public:
bool mayNeedRelaxation(const MCInst &Inst) const override;
+ const char *reasonForFixupRelaxation(const MCFixup &Fixup,
+ uint64_t Value) const;
+
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override;
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index a6206e3..995dd0f 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -16,11 +16,12 @@ using namespace llvm;
namespace {
class ARMAsmBackendDarwin : public ARMAsmBackend {
+ const MCRegisterInfo &MRI;
public:
const MachO::CPUSubTypeARM Subtype;
ARMAsmBackendDarwin(const Target &T, const Triple &TT,
- MachO::CPUSubTypeARM st)
- : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) {
+ const MCRegisterInfo &MRI, MachO::CPUSubTypeARM st)
+ : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), MRI(MRI), Subtype(st) {
HasDataInCodeSupport = true;
}
@@ -28,6 +29,9 @@ public:
return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM,
Subtype);
}
+
+ uint32_t generateCompactUnwindEncoding(
+ ArrayRef<MCCFIInstruction> Instrs) const override;
};
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 804d353..52eba8be 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -95,7 +95,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_GOTTPOFF:
Type = ELF::R_ARM_TLS_IE32;
break;
- case MCSymbolRefExpr::VK_GOTPCREL:
+ case MCSymbolRefExpr::VK_ARM_GOT_PREL:
Type = ELF::R_ARM_GOT_PREL;
break;
}
@@ -192,7 +192,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_GOTOFF:
Type = ELF::R_ARM_GOTOFF32;
break;
- case MCSymbolRefExpr::VK_GOTPCREL:
+ case MCSymbolRefExpr::VK_ARM_GOT_PREL:
Type = ELF::R_ARM_GOT_PREL;
break;
case MCSymbolRefExpr::VK_ARM_TARGET1:
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index d17fdb9..6084f22 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -79,7 +79,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer {
void emitAttribute(unsigned Attribute, unsigned Value) override;
void emitTextAttribute(unsigned Attribute, StringRef String) override;
void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
- StringRef StrinValue) override;
+ StringRef StringValue) override;
void emitArch(unsigned Arch) override;
void emitArchExtension(unsigned ArchExt) override;
void emitObjectArch(unsigned Arch) override;
@@ -195,16 +195,16 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute,
OS << "\n";
}
void ARMTargetAsmStreamer::emitArch(unsigned Arch) {
- OS << "\t.arch\t" << ARMTargetParser::getArchName(Arch) << "\n";
+ OS << "\t.arch\t" << ARM::getArchName(Arch) << "\n";
}
void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) {
- OS << "\t.arch_extension\t" << ARMTargetParser::getArchExtName(ArchExt) << "\n";
+ OS << "\t.arch_extension\t" << ARM::getArchExtName(ArchExt) << "\n";
}
void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) {
- OS << "\t.object_arch\t" << ARMTargetParser::getArchName(Arch) << '\n';
+ OS << "\t.object_arch\t" << ARM::getArchName(Arch) << '\n';
}
void ARMTargetAsmStreamer::emitFPU(unsigned FPU) {
- OS << "\t.fpu\t" << ARMTargetParser::getFPUName(FPU) << "\n";
+ OS << "\t.fpu\t" << ARM::getFPUName(FPU) << "\n";
}
void ARMTargetAsmStreamer::finishAttributeSection() {
}
@@ -243,7 +243,7 @@ void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset,
class ARMTargetELFStreamer : public ARMTargetStreamer {
private:
// This structure holds all attributes, accounting for
- // their string/numeric value, so we can later emmit them
+ // their string/numeric value, so we can later emit them
// in declaration order, keeping all in the same vector
struct AttributeItem {
enum {
@@ -254,7 +254,7 @@ private:
} Type;
unsigned Tag;
unsigned IntValue;
- StringRef StringValue;
+ std::string StringValue;
static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) {
// The conformance tag must be emitted first when serialised
@@ -507,14 +507,15 @@ public:
/// This is one of the functions used to emit data into an ELF section, so the
/// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
/// necessary.
- void EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc) override {
+ void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value))
- if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4))
- getContext().reportFatalError(Loc, "relocated expression must be 32-bit");
+ if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4)) {
+ getContext().reportError(Loc, "relocated expression must be 32-bit");
+ return;
+ }
EmitDataMappingSymbol();
- MCELFStreamer::EmitValueImpl(Value, Size);
+ MCELFStreamer::EmitValueImpl(Value, Size, Loc);
}
void EmitAssemblerFlag(MCAssemblerFlag Flag) override {
@@ -684,16 +685,16 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
using namespace ARMBuildAttrs;
setAttributeItem(CPU_name,
- ARMTargetParser::getCPUAttr(Arch),
+ ARM::getCPUAttr(Arch),
false);
if (EmittedArch == ARM::AK_INVALID)
setAttributeItem(CPU_arch,
- ARMTargetParser::getArchAttr(Arch),
+ ARM::getArchAttr(Arch),
false);
else
setAttributeItem(CPU_arch,
- ARMTargetParser::getArchAttr(EmittedArch),
+ ARM::getArchAttr(EmittedArch),
false);
switch (Arch) {
@@ -702,7 +703,6 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::AK_ARMV3:
case ARM::AK_ARMV3M:
case ARM::AK_ARMV4:
- case ARM::AK_ARMV5:
setAttributeItem(ARM_ISA_use, Allowed, false);
break;
@@ -710,7 +710,6 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::AK_ARMV5T:
case ARM::AK_ARMV5TE:
case ARM::AK_ARMV6:
- case ARM::AK_ARMV6J:
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, Allowed, false);
break;
@@ -721,8 +720,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
break;
case ARM::AK_ARMV6K:
- case ARM::AK_ARMV6Z:
- case ARM::AK_ARMV6ZK:
+ case ARM::AK_ARMV6KZ:
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, Allowed, false);
setAttributeItem(Virtualization_use, AllowTZ, false);
@@ -732,10 +730,6 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
setAttributeItem(THUMB_ISA_use, Allowed, false);
break;
- case ARM::AK_ARMV7:
- setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
- break;
-
case ARM::AK_ARMV7A:
setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
setAttributeItem(ARM_ISA_use, Allowed, false);
@@ -755,6 +749,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::AK_ARMV8A:
case ARM::AK_ARMV8_1A:
+ case ARM::AK_ARMV8_2A:
setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
@@ -1084,19 +1079,14 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
}
inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) {
- SwitchToEHSection(".ARM.extab",
- ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC,
- SectionKind::getDataRel(),
- FnStart);
+ SwitchToEHSection(".ARM.extab", ELF::SHT_PROGBITS, ELF::SHF_ALLOC,
+ SectionKind::getData(), FnStart);
}
inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
- SwitchToEHSection(".ARM.exidx",
- ELF::SHT_ARM_EXIDX,
+ SwitchToEHSection(".ARM.exidx", ELF::SHT_ARM_EXIDX,
ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER,
- SectionKind::getDataRel(),
- FnStart);
+ SectionKind::getData(), FnStart);
}
void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
MCDataFragment *Frag = getOrCreateDataFragment();
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 1ac0815..bda37f6 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -33,7 +33,9 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) {
SupportsDebugInformation = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::SjLj;
+ ExceptionsType = TheTriple.isOSDarwin() && !TheTriple.isWatchOS()
+ ? ExceptionHandling::SjLj
+ : ExceptionHandling::DwarfCFI;
UseIntegratedAssembler = true;
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 99a5fff..5e54816 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -19,34 +19,37 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class Triple;
-
- class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
- virtual void anchor();
-
- public:
- explicit ARMMCAsmInfoDarwin(const Triple &TheTriple);
- };
-
- class ARMELFMCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
- public:
- explicit ARMELFMCAsmInfo(const Triple &TT);
-
- void setUseIntegratedAssembler(bool Value) override;
- };
-
- class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
- void anchor() override;
- public:
- explicit ARMCOFFMCAsmInfoMicrosoft();
- };
-
- class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF {
- void anchor() override;
- public:
- explicit ARMCOFFMCAsmInfoGNU();
- };
+class Triple;
+
+class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+
+public:
+ explicit ARMMCAsmInfoDarwin(const Triple &TheTriple);
+};
+
+class ARMELFMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit ARMELFMCAsmInfo(const Triple &TT);
+
+ void setUseIntegratedAssembler(bool Value) override;
+};
+
+class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
+ void anchor() override;
+
+public:
+ explicit ARMCOFFMCAsmInfoMicrosoft();
+};
+
+class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF {
+ void anchor() override;
+
+public:
+ explicit ARMCOFFMCAsmInfoGNU();
+};
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 9146d4d..75dde80 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -63,8 +63,8 @@ public:
return false;
}
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *findAssociatedSection() const override {
- return getSubExpr()->findAssociatedSection();
+ MCFragment *findAssociatedFragment() const override {
+ return getSubExpr()->findAssociatedFragment();
}
// There are no TLS ARMMCExprs at the moment.
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 21c9fc1..8c8c249 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -24,6 +24,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -134,101 +135,11 @@ std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) {
bool isThumb =
TT.getArch() == Triple::thumb || TT.getArch() == Triple::thumbeb;
- bool NoCPU = CPU == "generic" || CPU.empty();
std::string ARMArchFeature;
- switch (TT.getSubArch()) {
- default:
- llvm_unreachable("invalid sub-architecture for ARM");
- case Triple::ARMSubArch_v8:
- if (NoCPU)
- // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
- // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
- // FeatureT2XtPk, FeatureCrypto, FeatureCRC
- ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
- "+trustzone,+t2xtpk,+crypto,+crc";
- else
- // Use CPU to figure out the exact features
- ARMArchFeature = "+v8";
- break;
- case Triple::ARMSubArch_v8_1a:
- if (NoCPU)
- // v8.1a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
- // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
- // FeatureT2XtPk, FeatureCrypto, FeatureCRC, FeatureV8_1a
- ARMArchFeature = "+v8.1a,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
- "+trustzone,+t2xtpk,+crypto,+crc";
- else
- // Use CPU to figure out the exact features
- ARMArchFeature = "+v8.1a";
- break;
- case Triple::ARMSubArch_v7m:
- isThumb = true;
- if (NoCPU)
- // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- break;
- case Triple::ARMSubArch_v7em:
- if (NoCPU)
- // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
- // FeatureT2XtPk, FeatureMClass
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,+t2xtpk,+mclass";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- break;
- case Triple::ARMSubArch_v7s:
- if (NoCPU)
- // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS
- // Swift
- ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+ras";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- break;
- case Triple::ARMSubArch_v7:
- // v7 CPUs have lots of different feature sets. If no CPU is specified,
- // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
- // the "minimum" feature set and use CPU string to figure out the exact
- // features.
- if (NoCPU)
- // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
- ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- break;
- case Triple::ARMSubArch_v6t2:
- ARMArchFeature = "+v6t2";
- break;
- case Triple::ARMSubArch_v6k:
- ARMArchFeature = "+v6k";
- break;
- case Triple::ARMSubArch_v6m:
- isThumb = true;
- if (NoCPU)
- // v6m: FeatureNoARM, FeatureMClass
- ARMArchFeature = "+v6m,+noarm,+mclass";
- else
- ARMArchFeature = "+v6";
- break;
- case Triple::ARMSubArch_v6:
- ARMArchFeature = "+v6";
- break;
- case Triple::ARMSubArch_v5te:
- ARMArchFeature = "+v5te";
- break;
- case Triple::ARMSubArch_v5:
- ARMArchFeature = "+v5t";
- break;
- case Triple::ARMSubArch_v4t:
- ARMArchFeature = "+v4t";
- break;
- case Triple::NoSubArch:
- break;
- }
+
+ unsigned ArchID = ARM::parseArch(TT.getArchName());
+ if (ArchID != ARM::AK_INVALID && (CPU.empty() || CPU == "generic"))
+ ARMArchFeature = (ARMArchFeature + "+" + ARM::getArchName(ArchID)).str();
if (isThumb) {
if (ARMArchFeature.empty())
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index fd30623..c2bbc8e 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -86,7 +86,8 @@ MCAsmBackend *createThumbBEAsmBackend(const Target &T,
// object file.
MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter, bool RelaxAll);
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool IncrementalLinkerCompatible);
/// Construct an ELF Mach-O object writer.
MCObjectWriter *createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 95d7ea7..cfd504e 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -150,10 +150,12 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- if (!A->getFragment())
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ if (!A->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
"symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
+ return;
+ }
uint32_t Value = Writer->getSymbolAddress(*A, Layout);
uint32_t Value2 = 0;
@@ -163,10 +165,12 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
if (const MCSymbolRefExpr *B = Target.getSymB()) {
const MCSymbol *SB = &B->getSymbol();
- if (!SB->getFragment())
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ if (!SB->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
"symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression");
+ return;
+ }
// Select the appropriate difference relocation type.
Type = MachO::ARM_RELOC_HALF_SECTDIFF;
@@ -251,10 +255,12 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- if (!A->getFragment())
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ if (!A->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
"symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
+ return;
+ }
uint32_t Value = Writer->getSymbolAddress(*A, Layout);
uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
@@ -265,10 +271,12 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
assert(Type == MachO::ARM_RELOC_VANILLA && "invalid reloc for 2 symbols");
const MCSymbol *SB = &B->getSymbol();
- if (!SB->getFragment())
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ if (!SB->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
"symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression");
+ return;
+ }
// Select the appropriate difference relocation type.
Type = MachO::ARM_RELOC_SECTDIFF;
@@ -346,13 +354,15 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer,
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Log2Size;
unsigned RelocType = MachO::ARM_RELOC_VANILLA;
- if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size))
+ if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
// If we failed to get fixup kind info, it's because there's no legal
// relocation type for the fixup kind. This happens when it's a fixup that's
// expected to always be resolvable at assembly time and not have any
// relocations needed.
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "unsupported relocation on symbol");
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "unsupported relocation on symbol");
+ return;
+ }
// If this is a difference or a defined symbol plus an offset, then we need a
// scattered relocation entry. Differences always require scattered
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index b680db5..dad50f2 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -27,8 +27,8 @@ ARMTargetStreamer::~ARMTargetStreamer() {}
// The constant pool handling is shared by all ARMTargetStreamer
// implementations.
-const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr) {
- return ConstantPools->addEntry(Streamer, Expr, 4);
+const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr, SMLoc Loc) {
+ return ConstantPools->addEntry(Streamer, Expr, 4, Loc);
}
void ARMTargetStreamer::emitCurrentConstantPool() {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index b993b1b..83fa084 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -37,11 +37,11 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
}
}
-MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context,
- MCAsmBackend &MAB,
- raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter,
- bool RelaxAll) {
- return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
+MCStreamer *llvm::createARMWinCOFFStreamer(
+ MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll, bool IncrementalLinkerCompatible) {
+ auto *S = new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
+ S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
+ return S;
}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 3b4358b..93e0ac4 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -13,6 +13,7 @@
#include "Thumb1FrameLowering.h"
#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -84,7 +85,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented");
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -100,7 +100,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
assert(NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc dl;
+
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned BasePtr = RegInfo->getBaseRegister();
int CFAOffset = 0;
@@ -168,8 +172,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
++MBBI;
- if (MBBI != MBB.end())
- dl = MBBI->getDebugLoc();
}
// Determine starting offsets of spill areas.
@@ -232,11 +234,10 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
}
}
-
// Adjust FP so it point to the stack slot that contains the previous FP.
if (HasFP) {
- FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI)
- + GPRCS1Size + ArgRegsSaveSize;
+ FramePtrOffsetInBlock +=
+ MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4)
.setMIFlags(MachineInstr::FrameSetup));
@@ -321,11 +322,8 @@ static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert((MBBI->getOpcode() == ARM::tBX_RET ||
- MBBI->getOpcode() == ARM::tPOP_RET) &&
- "Can only insert epilog into returning blocks");
- DebugLoc dl = MBBI->getDebugLoc();
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const ThumbRegisterInfo *RegInfo =
@@ -377,9 +375,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
ARM::SP)
.addReg(FramePtr));
} else {
- if (MBBI->getOpcode() == ARM::tBX_RET &&
- &MBB.front() != MBBI &&
- std::prev(MBBI)->getOpcode() == ARM::tPOP) {
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
+ &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes))
emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
@@ -388,66 +385,189 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
- bool IsV4PopReturn = false;
- for (const CalleeSavedInfo &CSI : MFI->getCalleeSavedInfo())
+ if (needPopSpecialFixUp(MF)) {
+ bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true);
+ (void)Done;
+ assert(Done && "Emission of the special fixup failed!?");
+ }
+}
+
+bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
+ if (!needPopSpecialFixUp(*MBB.getParent()))
+ return true;
+
+ MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+ return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false);
+}
+
+bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const {
+ ARMFunctionInfo *AFI =
+ const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>();
+ if (AFI->getArgRegsSaveSize())
+ return true;
+
+ // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up.
+ for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo())
if (CSI.getReg() == ARM::LR)
- IsV4PopReturn = true;
- IsV4PopReturn &= STI.hasV4TOps() && !STI.hasV5TOps();
-
- // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
- // to LR, and we can't pop the value directly to the PC since
- // we need to update the SP after popping the value. So instead
- // we have to emit:
- // POP {r3}
- // ADD sp, #offset
- // BX r3
- // If this would clobber a return value, then generate this sequence instead:
- // MOV ip, r3
- // POP {r3}
- // ADD sp, #offset
- // MOV lr, r3
- // MOV r3, ip
- // BX lr
- if (ArgRegsSaveSize || IsV4PopReturn) {
- // Get the last instruction, tBX_RET
- MBBI = MBB.getLastNonDebugInstr();
- assert (MBBI->getOpcode() == ARM::tBX_RET);
- DebugLoc dl = MBBI->getDebugLoc();
-
- if (AFI->getReturnRegsCount() <= 3) {
- // Epilogue: pop saved LR to R3 and branch off it.
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
- .addReg(ARM::R3, RegState::Define);
-
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
-
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
- .addReg(ARM::R3, RegState::Kill);
- AddDefaultPred(MIB);
- MIB.copyImplicitOps(&*MBBI);
- // erase the old tBX_RET instruction
- MBB.erase(MBBI);
- } else {
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::R12, RegState::Define)
- .addReg(ARM::R3, RegState::Kill));
+ return true;
+
+ return false;
+}
+
+bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
+ bool DoIt) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
- .addReg(ARM::R3, RegState::Define);
+ // If MBBI is a return instruction, or is a tPOP followed by a return
+ // instruction in the successor BB, we may be able to directly restore
+ // LR in the PC.
+ // This is only possible with v5T ops (v4T can't change the Thumb bit via
+ // a POP PC instruction), and only if we do not need to emit any SP update.
+ // Otherwise, we need a temporary register to pop the value
+ // and copy that value into LR.
+ auto MBBI = MBB.getFirstTerminator();
+ bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize;
+ if (CanRestoreDirectly) {
+ if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB)
+ CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::tPOP_RET);
+ else {
+ auto MBBI_prev = MBBI;
+ MBBI_prev--;
+ assert(MBBI_prev->getOpcode() == ARM::tPOP);
+ assert(MBB.succ_size() == 1);
+ if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET)
+ MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET.
+ else
+ CanRestoreDirectly = false;
+ }
+ }
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+ if (CanRestoreDirectly) {
+ if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET)
+ return true;
+ MachineInstrBuilder MIB =
+ AddDefaultPred(
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)));
+ // Copy implicit ops and popped registers, if any.
+ for (auto MO: MBBI->operands())
+ if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
+ MIB.addOperand(MO);
+ MIB.addReg(ARM::PC, RegState::Define);
+ // Erase the old instruction (tBX_RET or tPOP).
+ MBB.erase(MBBI);
+ return true;
+ }
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::LR, RegState::Define)
- .addReg(ARM::R3, RegState::Kill));
+ // Look for a temporary register to use.
+ // First, compute the liveness information.
+ LivePhysRegs UsedRegs(STI.getRegisterInfo());
+ UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true);
+ // The semantic of pristines changed recently and now,
+ // the callee-saved registers that are touched in the function
+ // are not part of the pristines set anymore.
+ // Add those callee-saved now.
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ UsedRegs.addReg(CSRegs[i]);
+
+ DebugLoc dl = DebugLoc();
+ if (MBBI != MBB.end()) {
+ dl = MBBI->getDebugLoc();
+ auto InstUpToMBBI = MBB.end();
+ while (InstUpToMBBI != MBBI)
+ // The pre-decrement is on purpose here.
+ // We want to have the liveness right before MBBI.
+ UsedRegs.stepBackward(*--InstUpToMBBI);
+ }
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::R3, RegState::Define)
- .addReg(ARM::R12, RegState::Kill));
- // Keep the tBX_RET instruction
+ // Look for a register that can be directly use in the POP.
+ unsigned PopReg = 0;
+ // And some temporary register, just in case.
+ unsigned TemporaryReg = 0;
+ BitVector PopFriendly =
+ TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID));
+ assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
+ // Rebuild the GPRs from the high registers because they are removed
+ // form the GPR reg class for thumb1.
+ BitVector GPRsNoLRSP =
+ TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID));
+ GPRsNoLRSP |= PopFriendly;
+ GPRsNoLRSP.reset(ARM::LR);
+ GPRsNoLRSP.reset(ARM::SP);
+ GPRsNoLRSP.reset(ARM::PC);
+ for (int Register = GPRsNoLRSP.find_first(); Register != -1;
+ Register = GPRsNoLRSP.find_next(Register)) {
+ if (!UsedRegs.contains(Register)) {
+ // Remember the first pop-friendly register and exit.
+ if (PopFriendly.test(Register)) {
+ PopReg = Register;
+ TemporaryReg = 0;
+ break;
+ }
+ // Otherwise, remember that the register will be available to
+ // save a pop-friendly register.
+ TemporaryReg = Register;
}
}
+
+ if (!DoIt && !PopReg && !TemporaryReg)
+ return false;
+
+ assert((PopReg || TemporaryReg) && "Cannot get LR");
+
+ if (TemporaryReg) {
+ assert(!PopReg && "Unnecessary MOV is about to be inserted");
+ PopReg = PopFriendly.find_first();
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(TemporaryReg, RegState::Define)
+ .addReg(PopReg, RegState::Kill));
+ }
+
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
+ // We couldn't use the direct restoration above, so
+ // perform the opposite conversion: tPOP_RET to tPOP.
+ MachineInstrBuilder MIB =
+ AddDefaultPred(
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)));
+ bool Popped = false;
+ for (auto MO: MBBI->operands())
+ if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
+ MO.getReg() != ARM::PC) {
+ MIB.addOperand(MO);
+ if (!MO.isImplicit())
+ Popped = true;
+ }
+ // Is there anything left to pop?
+ if (!Popped)
+ MBB.erase(MIB.getInstr());
+ // Erase the old instruction.
+ MBB.erase(MBBI);
+ MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)));
+ }
+
+ assert(PopReg && "Do not know how to get LR");
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ .addReg(PopReg, RegState::Define);
+
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(PopReg, RegState::Kill));
+
+ if (TemporaryReg)
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(PopReg, RegState::Define)
+ .addReg(TemporaryReg, RegState::Kill));
+
+ return true;
}
bool Thumb1FrameLowering::
@@ -461,8 +581,6 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
DebugLoc DL;
const TargetInstrInfo &TII = *STI.getInstrInfo();
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
AddDefaultPred(MIB);
for (unsigned i = CSI.size(); i != 0; --i) {
@@ -501,31 +619,38 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
const TargetInstrInfo &TII = *STI.getInstrInfo();
bool isVarArg = AFI->getArgRegsSaveSize() > 0;
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
AddDefaultPred(MIB);
- bool NumRegs = false;
+ bool NeedsPop = false;
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
if (Reg == ARM::LR) {
- // Special epilogue for vararg functions. See emitEpilogue
- if (isVarArg)
- continue;
- // ARMv4T requires BX, see emitEpilogue
- if (STI.hasV4TOps() && !STI.hasV5TOps())
+ if (MBB.succ_empty()) {
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (isVarArg)
+ continue;
+ // ARMv4T requires BX, see emitEpilogue
+ if (!STI.hasV5TOps())
+ continue;
+ Reg = ARM::PC;
+ (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ if (MI != MBB.end())
+ MIB.copyImplicitOps(&*MI);
+ MI = MBB.erase(MI);
+ } else
+ // LR may only be popped into PC, as part of return sequence.
+ // If this isn't the return sequence, we'll need emitPopSpecialFixUp
+ // to restore LR the hard way.
continue;
- Reg = ARM::PC;
- (*MIB).setDesc(TII.get(ARM::tPOP_RET));
- MIB.copyImplicitOps(&*MI);
- MI = MBB.erase(MI);
}
MIB.addReg(Reg, getDefRegState(true));
- NumRegs = true;
+ NeedsPop = true;
}
// It's illegal to emit pop instruction without operands.
- if (NumRegs)
+ if (NeedsPop)
MBB.insert(MI, &*MIB);
else
MF.DeleteMachineInstr(MIB);
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
index 31d5732..812f983 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -45,6 +45,42 @@ public:
eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
+
+ /// Check whether or not the given \p MBB can be used as a epilogue
+ /// for the target.
+ /// The epilogue will be inserted before the first terminator of that block.
+ /// This method is used by the shrink-wrapping pass to decide if
+ /// \p MBB will be correctly handled by the target.
+ bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
+
+private:
+ /// Check if the frame lowering of \p MF needs a special fixup
+ /// code sequence for the epilogue.
+ /// Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+ /// to LR, and we can't pop the value directly to the PC when
+ /// we need to update the SP after popping the value. So instead
+ /// we have to emit:
+ /// POP {r3}
+ /// ADD sp, #offset
+ /// BX r3
+ /// If this would clobber a return value, then generate this sequence instead:
+ /// MOV ip, r3
+ /// POP {r3}
+ /// ADD sp, #offset
+ /// MOV lr, r3
+ /// MOV r3, ip
+ /// BX lr
+ bool needPopSpecialFixUp(const MachineFunction &MF) const;
+
+ /// Emit the special fixup code sequence for the epilogue.
+ /// \see needPopSpecialFixUp for more details.
+ /// \p DoIt, tells this method whether or not to actually insert
+ /// the code sequence in \p MBB. I.e., when \p DoIt is false,
+ /// \p MBB is left untouched.
+ /// \returns For \p DoIt == true: True when the emission succeeded
+ /// false otherwise. For \p DoIt == false: True when the emission
+ /// would have been possible, false otherwise.
+ bool emitPopSpecialFixUp(MachineBasicBlock &MBB, bool DoIt) const;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 216e776..530e1d3 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -84,11 +84,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
@@ -112,11 +110,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 68736bc..bf0498d 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -256,8 +256,8 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
// Finalize the bundle.
- MachineBasicBlock::instr_iterator LI = LastITMI;
- finalizeBundle(MBB, InsertPos.getInstrIterator(), std::next(LI));
+ finalizeBundle(MBB, InsertPos.getInstrIterator(),
+ ++LastITMI->getIterator());
Modified = true;
++NumITs;
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index dc74f4e..4da769f 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -131,11 +131,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
@@ -171,11 +169,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index d9ab824..bcd0e57 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -125,7 +125,10 @@ namespace {
{ ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
{ ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
{ ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
- // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+ // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
+ // tSTMIA_UPD is a change in semantics which can only be used if the base
+ // register is killed. This difference is correctly handled elsewhere.
+ { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
{ ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
{ ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
};
@@ -210,12 +213,12 @@ Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
unsigned FromOpc = ReduceTable[i].WideOpc;
if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
- assert(false && "Duplicated entries?");
+ llvm_unreachable("Duplicated entries?");
}
}
static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
- for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
+ for (const MCPhysReg *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
if (*Regs == ARM::CPSR)
return true;
return false;
@@ -435,6 +438,14 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
isLdStMul = true;
break;
}
+ case ARM::t2STMIA: {
+ // If the base register is killed, we don't care what its value is after the
+ // instruction, so we can use an updating STMIA.
+ if (!MI->getOperand(0).isKill())
+ return false;
+
+ break;
+ }
case ARM::t2LDMIA_RET: {
unsigned BaseReg = MI->getOperand(1).getReg();
if (BaseReg != ARM::SP)
@@ -492,6 +503,12 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit load / store instruction.
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
+
+ // tSTMIA_UPD takes a defining register operand. We've already checked that
+ // the register is killed, so mark it as dead here.
+ if (Entry.WideOpc == ARM::t2STMIA)
+ MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
+
if (!isLdStMul) {
MIB.addOperand(MI->getOperand(0));
MIB.addOperand(MI->getOperand(1));
@@ -633,10 +650,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
- if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
- STI->avoidMOVsShifterOperand())
+ if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
// Don't issue movs with shifter operand for some CPUs unless we
- // are optimizing / minimizing for size.
+ // are optimizing for size.
return false;
unsigned Reg0 = MI->getOperand(0).getReg();
@@ -660,11 +676,13 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
}
} else if (Reg0 != Reg1) {
// Try to commute the operands to make it a 2-address instruction.
- unsigned CommOpIdx1, CommOpIdx2;
+ unsigned CommOpIdx1 = 1;
+ unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
- CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
+ MI->getOperand(CommOpIdx2).getReg() != Reg0)
return false;
- MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+ MachineInstr *CommutedMI =
+ TII->commuteInstruction(MI, false, CommOpIdx1, CommOpIdx2);
if (!CommutedMI)
return false;
}
@@ -750,10 +768,9 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
return false;
- if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
- STI->avoidMOVsShifterOperand())
+ if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
// Don't issue movs with shifter operand for some CPUs unless we
- // are optimizing / minimizing for size.
+ // are optimizing for size.
return false;
unsigned Limit = ~0U;
@@ -1012,9 +1029,9 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
- // Optimizing / minimizing size?
- OptimizeSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
- MinimizeSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+ // Optimizing / minimizing size? Minimizing size implies optimizing for size.
+ OptimizeSize = MF.getFunction()->optForSize();
+ MinimizeSize = MF.getFunction()->optForMinSize();
BlockInfo.clear();
BlockInfo.resize(MF.getNumBlockIDs());
OpenPOWER on IntegriCloud