summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.h63
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td215
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAddressingModes.h585
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp512
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp1556
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h112
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInfo.h249
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp2323
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h527
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp1227
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h205
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h131
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.h160
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td161
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp1878
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp1900
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp130
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h122
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp83
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h58
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp1242
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp1901
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFixupKinds.h97
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp1021
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.h74
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp217
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp121
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h54
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp2882
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp6984
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h488
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrFormats.td1793
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp61
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.h44
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td3909
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td4838
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td1550
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td3432
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td1129
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp336
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMJITInfo.h183
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp1838
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCAsmInfo.cpp68
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCAsmInfo.h31
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp1230
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp73
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCExpr.h73
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp115
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h250
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h6586
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp40
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h33
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td559
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRelocations.h62
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSchedule.td261
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA8.td1034
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA9.td1824
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleV6.td294
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp134
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h42
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp252
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h234
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp202
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.h143
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp46
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h38
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp152
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp1866
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp548
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h99
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp3245
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h262
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h2239
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp711
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h111
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/Makefile15
-rw-r--r--contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp321
-rw-r--r--contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp141
-rw-r--r--contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp352
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h52
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp111
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h59
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp695
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h63
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp255
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp616
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h78
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp61
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h42
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp770
92 files changed, 72908 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
new file mode 100644
index 0000000..4679f74
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -0,0 +1,63 @@
+//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// ARM back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARM_H
+#define TARGET_ARM_H
+
+#include "ARMBaseInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+namespace llvm {
+
+class ARMBaseTargetMachine;
+class FunctionPass;
+class JITCodeEmitter;
+class formatted_raw_ostream;
+class MCCodeEmitter;
+class TargetAsmBackend;
+class MachineInstr;
+class ARMAsmPrinter;
+class MCInst;
+
+MCCodeEmitter *createARMMCCodeEmitter(const Target &,
+ TargetMachine &TM,
+ MCContext &Ctx);
+
+TargetAsmBackend *createARMAsmBackend(const Target &, const std::string &);
+
+FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
+ JITCodeEmitter &JCE);
+
+FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
+FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
+FunctionPass *createARMConstantIslandPass();
+FunctionPass *createNEONMoveFixPass();
+FunctionPass *createMLxExpansionPass();
+FunctionPass *createThumb2ITBlockPass();
+FunctionPass *createThumb2SizeReductionPass();
+
+extern Target TheARMTarget, TheThumbTarget;
+
+void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ ARMAsmPrinter &AP);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
new file mode 100644
index 0000000..bf4315f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -0,0 +1,215 @@
+//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget features.
+//
+
+def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
+ "Enable VFP2 instructions">;
+def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
+ "Enable VFP3 instructions">;
+def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON",
+ "Enable NEON instructions">;
+def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
+ "Enable Thumb2 instructions">;
+def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
+ "Does not support ARM mode execution">;
+def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
+ "Enable half-precision floating point">;
+def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
+ "Restrict VFP3 to 16 double registers">;
+def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
+ "Enable divide instructions">;
+def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
+ "Enable Thumb2 extract and pack instructions">;
+def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
+ "Has data barrier (dmb / dsb) instructions">;
+def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
+ "FP compare + branch is slow">;
+def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
+ "Floating point unit supports single precision only">;
+
+// Some processors have FP multiply-accumulate instructions that don't
+// play nicely with other VFP / NEON instructions, and it's generally better
+// to just not use them.
+def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
+ "Disable VFP / NEON MAC instructions">;
+// Some processors benefit from using NEON instructions for scalar
+// single-precision FP operations.
+def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
+ "true",
+ "Use NEON for single precision FP">;
+
+// Disable 32-bit to 16-bit narrowing for experimentation.
+def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
+ "Prefer 32-bit Thumb instrs">;
+
+// Multiprocessing extension.
+def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
+ "Supports Multiprocessing extension">;
+
+// ARM architectures.
+def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
+ "ARM v4T">;
+def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
+ "ARM v5T">;
+def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
+ "ARM v5TE, v5TEj, v5TExp">;
+def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
+ "ARM v6">;
+def ArchV6M : SubtargetFeature<"v6m", "ARMArchVersion", "V6M",
+ "ARM v6m",
+ [FeatureNoARM, FeatureDB]>;
+def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
+ "ARM v6t2",
+ [FeatureThumb2]>;
+def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
+ "ARM v7A",
+ [FeatureThumb2, FeatureNEON, FeatureDB]>;
+def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
+ "ARM v7M",
+ [FeatureThumb2, FeatureNoARM, FeatureDB,
+ FeatureHWDiv]>;
+
+//===----------------------------------------------------------------------===//
+// ARM Processors supported.
+//
+
+include "ARMSchedule.td"
+
+// ARM processor families.
+def ProcOthers : SubtargetFeature<"others", "ARMProcFamily", "Others",
+ "One of the other ARM processor families">;
+def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
+ "Cortex-A8 ARM processors",
+ [FeatureSlowFPBrcc, FeatureNEONForFP,
+ FeatureHasSlowFPVMLx, FeatureT2XtPk]>;
+def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
+ "Cortex-A9 ARM processors",
+ [FeatureHasSlowFPVMLx, FeatureT2XtPk,
+ FeatureFP16]>;
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, GenericItineraries, Features>;
+
+// V4 Processors.
+def : ProcNoItin<"generic", []>;
+def : ProcNoItin<"arm8", []>;
+def : ProcNoItin<"arm810", []>;
+def : ProcNoItin<"strongarm", []>;
+def : ProcNoItin<"strongarm110", []>;
+def : ProcNoItin<"strongarm1100", []>;
+def : ProcNoItin<"strongarm1110", []>;
+
+// V4T Processors.
+def : ProcNoItin<"arm7tdmi", [ArchV4T]>;
+def : ProcNoItin<"arm7tdmi-s", [ArchV4T]>;
+def : ProcNoItin<"arm710t", [ArchV4T]>;
+def : ProcNoItin<"arm720t", [ArchV4T]>;
+def : ProcNoItin<"arm9", [ArchV4T]>;
+def : ProcNoItin<"arm9tdmi", [ArchV4T]>;
+def : ProcNoItin<"arm920", [ArchV4T]>;
+def : ProcNoItin<"arm920t", [ArchV4T]>;
+def : ProcNoItin<"arm922t", [ArchV4T]>;
+def : ProcNoItin<"arm940t", [ArchV4T]>;
+def : ProcNoItin<"ep9312", [ArchV4T]>;
+
+// V5T Processors.
+def : ProcNoItin<"arm10tdmi", [ArchV5T]>;
+def : ProcNoItin<"arm1020t", [ArchV5T]>;
+
+// V5TE Processors.
+def : ProcNoItin<"arm9e", [ArchV5TE]>;
+def : ProcNoItin<"arm926ej-s", [ArchV5TE]>;
+def : ProcNoItin<"arm946e-s", [ArchV5TE]>;
+def : ProcNoItin<"arm966e-s", [ArchV5TE]>;
+def : ProcNoItin<"arm968e-s", [ArchV5TE]>;
+def : ProcNoItin<"arm10e", [ArchV5TE]>;
+def : ProcNoItin<"arm1020e", [ArchV5TE]>;
+def : ProcNoItin<"arm1022e", [ArchV5TE]>;
+def : ProcNoItin<"xscale", [ArchV5TE]>;
+def : ProcNoItin<"iwmmxt", [ArchV5TE]>;
+
+// V6 Processors.
+def : Processor<"arm1136j-s", ARMV6Itineraries, [ArchV6]>;
+def : Processor<"arm1136jf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+def : Processor<"arm1176jz-s", ARMV6Itineraries, [ArchV6]>;
+def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>;
+def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+// V6M Processors.
+def : Processor<"cortex-m0", ARMV6Itineraries, [ArchV6M]>;
+
+// V6T2 Processors.
+def : Processor<"arm1156t2-s", ARMV6Itineraries, [ArchV6T2]>;
+def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+// V7 Processors.
+def : Processor<"cortex-a8", CortexA8Itineraries,
+ [ArchV7A, ProcA8]>;
+def : Processor<"cortex-a9", CortexA9Itineraries,
+ [ArchV7A, ProcA9]>;
+
+// V7M Processors.
+def : ProcNoItin<"cortex-m3", [ArchV7M]>;
+def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureVFP2, FeatureVFPOnlySP]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "ARMRegisterInfo.td"
+
+include "ARMCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrInfo.td"
+
+def ARMInstrInfo : InstrInfo;
+
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+// ARM Uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def ARMAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def ARM : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = ARMInstrInfo;
+
+ let AssemblyWriters = [ARMAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h
new file mode 100644
index 0000000..19fbf05
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h
@@ -0,0 +1,585 @@
+//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM addressing mode implementation stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+
+/// ARM_AM - ARM Addressing Mode Stuff
+namespace ARM_AM {
+ enum ShiftOpc {
+ no_shift = 0,
+ asr,
+ lsl,
+ lsr,
+ ror,
+ rrx
+ };
+
+ enum AddrOpc {
+ add = '+', sub = '-'
+ };
+
+ static inline const char *getAddrOpcStr(AddrOpc Op) {
+ return Op == sub ? "-" : "";
+ }
+
+ static inline const char *getShiftOpcStr(ShiftOpc Op) {
+ switch (Op) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::asr: return "asr";
+ case ARM_AM::lsl: return "lsl";
+ case ARM_AM::lsr: return "lsr";
+ case ARM_AM::ror: return "ror";
+ case ARM_AM::rrx: return "rrx";
+ }
+ }
+
+ static inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
+ switch (Op) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::asr: return 2;
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::ror: return 3;
+ }
+ }
+
+ static inline ShiftOpc getShiftOpcForNode(SDValue N) {
+ switch (N.getOpcode()) {
+ default: return ARM_AM::no_shift;
+ case ISD::SHL: return ARM_AM::lsl;
+ case ISD::SRL: return ARM_AM::lsr;
+ case ISD::SRA: return ARM_AM::asr;
+ case ISD::ROTR: return ARM_AM::ror;
+ //case ISD::ROTL: // Only if imm -> turn into ROTR.
+ // Can't handle RRX here, because it would require folding a flag into
+ // the addressing mode. :( This causes us to miss certain things.
+ //case ARMISD::RRX: return ARM_AM::rrx;
+ }
+ }
+
+ enum AMSubMode {
+ bad_am_submode = 0,
+ ia,
+ ib,
+ da,
+ db
+ };
+
+ static inline const char *getAMSubModeStr(AMSubMode Mode) {
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::ia: return "ia";
+ case ARM_AM::ib: return "ib";
+ case ARM_AM::da: return "da";
+ case ARM_AM::db: return "db";
+ }
+ }
+
+ /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
+ ///
+ static inline unsigned rotr32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val >> Amt) | (Val << ((32-Amt)&31));
+ }
+
+ /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
+ ///
+ static inline unsigned rotl32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val << Amt) | (Val >> ((32-Amt)&31));
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #1: shift_operand with registers
+ //===--------------------------------------------------------------------===//
+ //
+ // This 'addressing mode' is used for arithmetic instructions. It can
+ // represent things like:
+ // reg
+ // reg [asr|lsl|lsr|ror|rrx] reg
+ // reg [asr|lsl|lsr|ror|rrx] imm
+ //
+ // This is stored three operands [rega, regb, opc]. The first is the base
+ // reg, the second is the shift amount (or reg0 if not present or imm). The
+ // third operand encodes the shift opcode and the imm if a reg isn't present.
+ //
+ static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
+ return ShOp | (Imm << 3);
+ }
+ static inline unsigned getSORegOffset(unsigned Op) {
+ return Op >> 3;
+ }
+ static inline ShiftOpc getSORegShOp(unsigned Op) {
+ return (ShiftOpc)(Op & 7);
+ }
+
+ /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
+ /// the 8-bit imm value.
+ static inline unsigned getSOImmValImm(unsigned Imm) {
+ return Imm & 0xFF;
+ }
+ /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
+ /// the rotate amount.
+ static inline unsigned getSOImmValRot(unsigned Imm) {
+ return (Imm >> 8) * 2;
+ }
+
+ /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
+ /// computing the rotate amount to use. If this immediate value cannot be
+ /// handled with a single shifter-op, determine a good rotate amount that will
+ /// take a maximal chunk of bits out of the immediate.
+ static inline unsigned getSOImmValRotate(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the rotate amount.
+ unsigned TZ = CountTrailingZeros_32(Imm);
+
+ // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
+ // not 9.
+ unsigned RotAmt = TZ & ~1;
+
+ // If we can handle this spread, return it.
+ if ((rotr32(Imm, RotAmt) & ~255U) == 0)
+ return (32-RotAmt)&31; // HW rotates right, not left.
+
+ // For values like 0xF000000F, we should ignore the low 6 bits, then
+ // retry the hunt.
+ if (Imm & 63U) {
+ unsigned TZ2 = CountTrailingZeros_32(Imm & ~63U);
+ unsigned RotAmt2 = TZ2 & ~1;
+ if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
+ return (32-RotAmt2)&31; // HW rotates right, not left.
+ }
+
+ // Otherwise, we have no way to cover this span of bits with a single
+ // shifter_op immediate. Return a chunk of bits that will be useful to
+ // handle.
+ return (32-RotAmt)&31; // HW rotates right, not left.
+ }
+
+ /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
+ /// into an shifter_operand immediate operand, return the 12-bit encoding for
+ /// it. If not, return -1.
+ static inline int getSOImmVal(unsigned Arg) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Arg & ~255U) == 0) return Arg;
+
+ unsigned RotAmt = getSOImmValRotate(Arg);
+
+ // If this cannot be handled with a single shifter_op, bail out.
+ if (rotr32(~255U, RotAmt) & Arg)
+ return -1;
+
+ // Encode this correctly.
+ return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
+ }
+
+ /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
+ /// or'ing together two SOImmVal's.
+ static inline bool isSOImmTwoPartVal(unsigned V) {
+ // If this can be handled with a single shifter_op, bail out.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ if (V == 0)
+ return false;
+
+ // If this can be handled with two shifter_op's, accept.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ return V == 0;
+ }
+
+ /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the first chunk of it.
+ static inline unsigned getSOImmTwoPartFirst(unsigned V) {
+ return rotr32(255U, getSOImmValRotate(V)) & V;
+ }
+
+ /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the second chunk of it.
+ static inline unsigned getSOImmTwoPartSecond(unsigned V) {
+ // Mask out the first hunk.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+
+ // Take what's left.
+ assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
+ return V;
+ }
+
+ /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
+ /// by a left shift. Returns the shift amount to use.
+ static inline unsigned getThumbImmValShift(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially immediate operand with a shift
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the shift amount.
+ return CountTrailingZeros_32(Imm);
+ }
+
+ /// isThumbImmShiftedVal - Return true if the specified value can be obtained
+ /// by left shifting a 8-bit immediate.
+ static inline bool isThumbImmShiftedVal(unsigned V) {
+ // If this can be handled with
+ V = (~255U << getThumbImmValShift(V)) & V;
+ return V == 0;
+ }
+
+ /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed
+ /// by a left shift. Returns the shift amount to use.
+ static inline unsigned getThumbImm16ValShift(unsigned Imm) {
+ // 16-bit (or less) immediates are trivially immediate operand with a shift
+ // of zero.
+ if ((Imm & ~65535U) == 0) return 0;
+
+ // Use CTZ to compute the shift amount.
+ return CountTrailingZeros_32(Imm);
+ }
+
+ /// isThumbImm16ShiftedVal - Return true if the specified value can be
+ /// obtained by left shifting a 16-bit immediate.
+ static inline bool isThumbImm16ShiftedVal(unsigned V) {
+ // If this can be handled with
+ V = (~65535U << getThumbImm16ValShift(V)) & V;
+ return V == 0;
+ }
+
+ /// getThumbImmNonShiftedVal - If V is a value that satisfies
+ /// isThumbImmShiftedVal, return the non-shiftd value.
+ static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
+ return V >> getThumbImmValShift(V);
+ }
+
+
+ /// getT2SOImmValSplat - Return the 12-bit encoded representation
+ /// if the specified value can be obtained by splatting the low 8 bits
+ /// into every other byte or every byte of a 32-bit value. i.e.,
+ /// 00000000 00000000 00000000 abcdefgh control = 0
+ /// 00000000 abcdefgh 00000000 abcdefgh control = 1
+ /// abcdefgh 00000000 abcdefgh 00000000 control = 2
+ /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3
+ /// Return -1 if none of the above apply.
+ /// See ARM Reference Manual A6.3.2.
+ static inline int getT2SOImmValSplatVal(unsigned V) {
+ unsigned u, Vs, Imm;
+ // control = 0
+ if ((V & 0xffffff00) == 0)
+ return V;
+
+ // If the value is zeroes in the first byte, just shift those off
+ Vs = ((V & 0xff) == 0) ? V >> 8 : V;
+ // Any passing value only has 8 bits of payload, splatted across the word
+ Imm = Vs & 0xff;
+ // Likewise, any passing values have the payload splatted into the 3rd byte
+ u = Imm | (Imm << 16);
+
+ // control = 1 or 2
+ if (Vs == u)
+ return (((Vs == V) ? 1 : 2) << 8) | Imm;
+
+ // control = 3
+ if (Vs == (u | (u << 8)))
+ return (3 << 8) | Imm;
+
+ return -1;
+ }
+
+ /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the
+ /// specified value is a rotated 8-bit value. Return -1 if no rotation
+ /// encoding is possible.
+ /// See ARM Reference Manual A6.3.2.
+ static inline int getT2SOImmValRotateVal(unsigned V) {
+ unsigned RotAmt = CountLeadingZeros_32(V);
+ if (RotAmt >= 24)
+ return -1;
+
+ // If 'Arg' can be handled with a single shifter_op return the value.
+ if ((rotr32(0xff000000U, RotAmt) & V) == V)
+ return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
+
+ return -1;
+ }
+
+ /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
+ /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit
+ /// encoding for it. If not, return -1.
+ /// See ARM Reference Manual A6.3.2.
+ static inline int getT2SOImmVal(unsigned Arg) {
+ // If 'Arg' is an 8-bit splat, then get the encoded value.
+ int Splat = getT2SOImmValSplatVal(Arg);
+ if (Splat != -1)
+ return Splat;
+
+ // If 'Arg' can be handled with a single shifter_op return the value.
+ int Rot = getT2SOImmValRotateVal(Arg);
+ if (Rot != -1)
+ return Rot;
+
+ return -1;
+ }
+
+ static inline unsigned getT2SOImmValRotate(unsigned V) {
+ if ((V & ~255U) == 0) return 0;
+ // Use CTZ to compute the rotate amount.
+ unsigned RotAmt = CountTrailingZeros_32(V);
+ return (32 - RotAmt) & 31;
+ }
+
+ static inline bool isT2SOImmTwoPartVal (unsigned Imm) {
+ unsigned V = Imm;
+ // Passing values can be any combination of splat values and shifter
+ // values. If this can be handled with a single shifter or splat, bail
+ // out. Those should be handled directly, not with a two-part val.
+ if (getT2SOImmValSplatVal(V) != -1)
+ return false;
+ V = rotr32 (~255U, getT2SOImmValRotate(V)) & V;
+ if (V == 0)
+ return false;
+
+ // If this can be handled as an immediate, accept.
+ if (getT2SOImmVal(V) != -1) return true;
+
+ // Likewise, try masking out a splat value first.
+ V = Imm;
+ if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1)
+ V &= ~0xff00ff00U;
+ else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1)
+ V &= ~0x00ff00ffU;
+ // If what's left can be handled as an immediate, accept.
+ if (getT2SOImmVal(V) != -1) return true;
+
+ // Otherwise, do not accept.
+ return false;
+ }
+
+ static inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) {
+ assert (isT2SOImmTwoPartVal(Imm) &&
+ "Immedate cannot be encoded as two part immediate!");
+ // Try a shifter operand as one part
+ unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm;
+ // If the rest is encodable as an immediate, then return it.
+ if (getT2SOImmVal(V) != -1) return V;
+
+ // Try masking out a splat value first.
+ if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1)
+ return Imm & 0xff00ff00U;
+
+ // The other splat is all that's left as an option.
+ assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1);
+ return Imm & 0x00ff00ffU;
+ }
+
+ static inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) {
+ // Mask out the first hunk
+ Imm ^= getT2SOImmTwoPartFirst(Imm);
+ // Return what's left
+ assert (getT2SOImmVal(Imm) != -1 &&
+ "Unable to encode second part of T2 two part SO immediate");
+ return Imm;
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #2
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for most simple load/store instructions.
+ //
+ // addrmode2 := reg +/- reg shop imm
+ // addrmode2 := reg +/- imm12
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 12, the immediate in bits 0-11, and the shift op in 13-15.
+ //
+ // If this addressing mode is a frame index (before prolog/epilog insertion
+ // and code rewriting), this operand will have the form: FI#, reg0, <offs>
+ // with no shift amount for the frame offset.
+ //
+ static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) {
+ assert(Imm12 < (1 << 12) && "Imm too large!");
+ bool isSub = Opc == sub;
+ return Imm12 | ((int)isSub << 12) | (SO << 13);
+ }
+ static inline unsigned getAM2Offset(unsigned AM2Opc) {
+ return AM2Opc & ((1 << 12)-1);
+ }
+ static inline AddrOpc getAM2Op(unsigned AM2Opc) {
+ return ((AM2Opc >> 12) & 1) ? sub : add;
+ }
+ static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
+ return (ShiftOpc)(AM2Opc >> 13);
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #3
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for sign-extending loads, and load/store-pair instructions.
+ //
+ // addrmode3 := reg +/- reg
+ // addrmode3 := reg +/- imm8
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 8, the immediate in bits 0-7.
+
+ /// getAM3Opc - This function encodes the addrmode3 opc field.
+ static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset;
+ }
+ static inline unsigned char getAM3Offset(unsigned AM3Opc) {
+ return AM3Opc & 0xFF;
+ }
+ static inline AddrOpc getAM3Op(unsigned AM3Opc) {
+ return ((AM3Opc >> 8) & 1) ? sub : add;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #4
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for load / store multiple instructions.
+ //
+ // addrmode4 := reg, <mode>
+ //
+ // The four modes are:
+ // IA - Increment after
+ // IB - Increment before
+ // DA - Decrement after
+ // DB - Decrement before
+ // For VFP instructions, only the IA and DB modes are valid.
+
+ static inline AMSubMode getAM4SubMode(unsigned Mode) {
+ return (AMSubMode)(Mode & 0x7);
+ }
+
+ static inline unsigned getAM4ModeImm(AMSubMode SubMode) {
+ return (int)SubMode;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #5
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for coprocessor instructions, such as FP load/stores.
+ //
+ // addrmode5 := reg +/- imm8*4
+ //
+ // The first operand is always a Reg. The second operand encodes the
+ // operation in bit 8 and the immediate in bits 0-7.
+
+ /// getAM5Opc - This function encodes the addrmode5 opc field.
+ static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset;
+ }
+ static inline unsigned char getAM5Offset(unsigned AM5Opc) {
+ return AM5Opc & 0xFF;
+ }
+ static inline AddrOpc getAM5Op(unsigned AM5Opc) {
+ return ((AM5Opc >> 8) & 1) ? sub : add;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #6
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for NEON load / store instructions.
+ //
+ // addrmode6 := reg with optional alignment
+ //
+ // This is stored in two operands [regaddr, align]. The first is the
+ // address register. The second operand is the value of the alignment
+ // specifier in bytes or zero if no explicit alignment.
+ // Valid alignments depend on the specific instruction.
+
+ //===--------------------------------------------------------------------===//
+ // NEON Modified Immediates
+ //===--------------------------------------------------------------------===//
+ //
+ // Several NEON instructions (e.g., VMOV) take a "modified immediate"
+ // vector operand, where a small immediate encoded in the instruction
+ // specifies a full NEON vector value. These modified immediates are
+ // represented here as encoded integers. The low 8 bits hold the immediate
+ // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
+ // the "Cmode" field of the instruction. The interfaces below treat the
+ // Op and Cmode values as a single 5-bit value.
+
+ static inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
+ return (OpCmode << 8) | Val;
+ }
+ static inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
+ return (ModImm >> 8) & 0x1f;
+ }
+ static inline unsigned getNEONModImmVal(unsigned ModImm) {
+ return ModImm & 0xff;
+ }
+
+ /// decodeNEONModImm - Decode a NEON modified immediate value into the
+ /// element value and the element size in bits. (If the element size is
+ /// smaller than the vector, it is splatted into all the elements.)
+ static inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
+ unsigned OpCmode = getNEONModImmOpCmode(ModImm);
+ unsigned Imm8 = getNEONModImmVal(ModImm);
+ uint64_t Val = 0;
+
+ if (OpCmode == 0xe) {
+ // 8-bit vector elements
+ Val = Imm8;
+ EltBits = 8;
+ } else if ((OpCmode & 0xc) == 0x8) {
+ // 16-bit vector elements
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 16;
+ } else if ((OpCmode & 0x8) == 0) {
+ // 32-bit vector elements, zero with one byte set
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 32;
+ } else if ((OpCmode & 0xe) == 0xc) {
+ // 32-bit vector elements, one byte with low bits set
+ unsigned ByteNum = 1 + (OpCmode & 0x1);
+ Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
+ EltBits = 32;
+ } else if (OpCmode == 0x1e) {
+ // 64-bit vector elements
+ for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
+ if ((ModImm >> ByteNum) & 1)
+ Val |= (uint64_t)0xff << (8 * ByteNum);
+ }
+ EltBits = 64;
+ } else {
+ assert(false && "Unsupported NEON immediate");
+ }
+ return Val;
+ }
+
+ AMSubMode getLoadStoreMultipleSubMode(int Opcode);
+
+} // end namespace ARM_AM
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp
new file mode 100644
index 0000000..ec23449
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp
@@ -0,0 +1,512 @@
+//===-- ARMAsmBackend.cpp - ARM Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+namespace {
+class ARMMachObjectWriter : public MCMachObjectTargetWriter {
+public:
+ ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/true) {}
+};
+
+class ARMELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ ARMELFObjectWriter(Triple::OSType OSType)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSType, ELF::EM_ARM,
+ /*HasRelocationAddend*/ false) {}
+};
+
+class ARMAsmBackend : public TargetAsmBackend {
+ bool isThumbMode; // Currently emitting Thumb code.
+public:
+ ARMAsmBackend(const Target &T) : TargetAsmBackend(), isThumbMode(false) {}
+
+ unsigned getNumFixupKinds() const { return ARM::NumTargetFixupKinds; }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[ARM::NumTargetFixupKinds] = {
+// This table *must* be in the order that the fixup_* kinds are defined in
+// ARMFixupKinds.h.
+//
+// Name Offset (bits) Size (bits) Flags
+{ "fixup_arm_ldst_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_pcrel_10", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_adr_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_blx", 7, 21, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cp", 1, 8, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_bcc", 1, 8, MCFixupKindInfo::FKF_IsPCRel },
+// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
+{ "fixup_arm_movt_hi16", 0, 20, 0 },
+{ "fixup_arm_movw_lo16", 0, 20, 0 },
+{ "fixup_t2_movt_hi16", 0, 20, 0 },
+{ "fixup_t2_movw_lo16", 0, 20, 0 },
+{ "fixup_arm_movt_hi16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_movw_lo16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movt_hi16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movw_lo16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return TargetAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ bool MayNeedRelaxation(const MCInst &Inst) const;
+
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+ void HandleAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ default: break;
+ case MCAF_Code16:
+ setIsThumb(true);
+ break;
+ case MCAF_Code32:
+ setIsThumb(false);
+ break;
+ }
+ }
+
+ unsigned getPointerSize() const { return 4; }
+ bool isThumb() const { return isThumbMode; }
+ void setIsThumb(bool it) { isThumbMode = it; }
+};
+} // end anonymous namespace
+
+bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+ // FIXME: Thumb targets, different move constant targets..
+ return false;
+}
+
+void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented");
+ return;
+}
+
+bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ if (isThumb()) {
+ // FIXME: 0xbf00 is the ARMv7 value. For v6 and before, we'll need to
+ // use 0x46c0 (which is a 'mov r8, r8' insn).
+ uint64_t NumNops = Count / 2;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write16(0xbf00);
+ if (Count & 1)
+ OW->Write8(0);
+ return true;
+ }
+ // ARM mode
+ uint64_t NumNops = Count / 4;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write32(0xe1a00000);
+ switch (Count % 4) {
+ default: break; // No leftover bytes to write
+ case 1: OW->Write8(0); break;
+ case 2: OW->Write16(0); break;
+ case 3: OW->Write16(0); OW->Write8(0xa0); break;
+ }
+
+ return true;
+}
+
+static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
+ return Value;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ Value >>= 16;
+ // Fallthrough
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel: {
+ unsigned Hi4 = (Value & 0xF000) >> 12;
+ unsigned Lo12 = Value & 0x0FFF;
+ // inst{19-16} = Hi4;
+ // inst{11-0} = Lo12;
+ Value = (Hi4 << 16) | (Lo12);
+ return Value;
+ }
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ Value >>= 16;
+ // Fallthrough
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel: {
+ unsigned Hi4 = (Value & 0xF000) >> 12;
+ unsigned i = (Value & 0x800) >> 11;
+ unsigned Mid3 = (Value & 0x700) >> 8;
+ unsigned Lo8 = Value & 0x0FF;
+ // inst{19-16} = Hi4;
+ // inst{26} = i;
+ // inst{14-12} = Mid3;
+ // inst{7-0} = Lo8;
+ Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
+
+ uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+ swapped |= (Value & 0x0000FFFF) << 16;
+ return swapped;
+ }
+ case ARM::fixup_arm_ldst_pcrel_12:
+ // ARM PC-relative values are offset by 8.
+ Value -= 4;
+ // FALLTHROUGH
+ case ARM::fixup_t2_ldst_pcrel_12: {
+ // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+ Value -= 4;
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ assert ((Value < 4096) && "Out of range pc-relative fixup value!");
+ Value |= isAdd << 23;
+
+ // Same addressing mode as fixup_arm_pcrel_10,
+ // but with 16-bit halfwords swapped.
+ if (Kind == ARM::fixup_t2_ldst_pcrel_12) {
+ uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+ swapped |= (Value & 0x0000FFFF) << 16;
+ return swapped;
+ }
+
+ return Value;
+ }
+ case ARM::fixup_thumb_adr_pcrel_10:
+ return ((Value - 4) >> 2) & 0xff;
+ case ARM::fixup_arm_adr_pcrel_12: {
+ // ARM PC-relative values are offset by 8.
+ Value -= 8;
+ unsigned opc = 4; // bits {24-21}. Default to add: 0b0100
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ opc = 2; // 0b0010
+ }
+ assert(ARM_AM::getSOImmVal(Value) != -1 &&
+ "Out of range pc-relative fixup value!");
+ // Encode the immediate and shift the opcode into place.
+ return ARM_AM::getSOImmVal(Value) | (opc << 21);
+ }
+
+ case ARM::fixup_t2_adr_pcrel_12: {
+ Value -= 4;
+ unsigned opc = 0;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ opc = 5;
+ }
+
+ uint32_t out = (opc << 21);
+ out |= (Value & 0x800) << 14;
+ out |= (Value & 0x700) << 4;
+ out |= (Value & 0x0FF);
+
+ uint64_t swapped = (out & 0xFFFF0000) >> 16;
+ swapped |= (out & 0x0000FFFF) << 16;
+ return swapped;
+ }
+
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ // These values don't encode the low two bits since they're always zero.
+ // Offset by 8 just as above.
+ return 0xffffff & ((Value - 8) >> 2);
+ case ARM::fixup_t2_uncondbranch: {
+ Value = Value - 4;
+ Value >>= 1; // Low bit is not encoded.
+
+ uint32_t out = 0;
+ bool I = Value & 0x800000;
+ bool J1 = Value & 0x400000;
+ bool J2 = Value & 0x200000;
+ J1 ^= I;
+ J2 ^= I;
+
+ out |= I << 26; // S bit
+ out |= !J1 << 13; // J1 bit
+ out |= !J2 << 11; // J2 bit
+ out |= (Value & 0x1FF800) << 5; // imm6 field
+ out |= (Value & 0x0007FF); // imm11 field
+
+ uint64_t swapped = (out & 0xFFFF0000) >> 16;
+ swapped |= (out & 0x0000FFFF) << 16;
+ return swapped;
+ }
+ case ARM::fixup_t2_condbranch: {
+ Value = Value - 4;
+ Value >>= 1; // Low bit is not encoded.
+
+ uint64_t out = 0;
+ out |= (Value & 0x80000) << 7; // S bit
+ out |= (Value & 0x40000) >> 7; // J2 bit
+ out |= (Value & 0x20000) >> 4; // J1 bit
+ out |= (Value & 0x1F800) << 5; // imm6 field
+ out |= (Value & 0x007FF); // imm11 field
+
+ uint32_t swapped = (out & 0xFFFF0000) >> 16;
+ swapped |= (out & 0x0000FFFF) << 16;
+ return swapped;
+ }
+ case ARM::fixup_arm_thumb_bl: {
+ // The value doesn't encode the low bit (always zero) and is offset by
+ // four. The value is encoded into disjoint bit positions in the destination
+ // opcode. x = unchanged, I = immediate value bit, S = sign extension bit
+ //
+ // BL: xxxxxSIIIIIIIIII xxxxxIIIIIIIIIII
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ unsigned isNeg = (int64_t(Value) < 0) ? 1 : 0;
+ uint32_t Binary = 0;
+ Value = 0x3fffff & ((Value - 4) >> 1);
+ Binary = (Value & 0x7ff) << 16; // Low imm11 value.
+ Binary |= (Value & 0x1ffc00) >> 11; // High imm10 value.
+ Binary |= isNeg << 10; // Sign bit.
+ return Binary;
+ }
+ case ARM::fixup_arm_thumb_blx: {
+ // The value doesn't encode the low two bits (always zero) and is offset by
+ // four (see fixup_arm_thumb_cp). The value is encoded into disjoint bit
+ // positions in the destination opcode. x = unchanged, I = immediate value
+ // bit, S = sign extension bit, 0 = zero.
+ //
+ // BLX: xxxxxSIIIIIIIIII xxxxxIIIIIIIIII0
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ unsigned isNeg = (int64_t(Value) < 0) ? 1 : 0;
+ uint32_t Binary = 0;
+ Value = 0xfffff & ((Value - 2) >> 2);
+ Binary = (Value & 0x3ff) << 17; // Low imm10L value.
+ Binary |= (Value & 0xffc00) >> 10; // High imm10H value.
+ Binary |= isNeg << 10; // Sign bit.
+ return Binary;
+ }
+ case ARM::fixup_arm_thumb_cp:
+ // Offset by 4, and don't encode the low two bits. Two bytes of that
+ // 'off by 4' is implicitly handled by the half-word ordering of the
+ // Thumb encoding, so we only need to adjust by 2 here.
+ return ((Value - 2) >> 2) & 0xff;
+ case ARM::fixup_arm_thumb_cb: {
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ uint32_t Binary = (Value - 4) >> 1;
+ return ((Binary & 0x20) << 4) | ((Binary & 0x1f) << 3);
+ }
+ case ARM::fixup_arm_thumb_br:
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ return ((Value - 4) >> 1) & 0x7ff;
+ case ARM::fixup_arm_thumb_bcc:
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ return ((Value - 4) >> 1) & 0xff;
+ case ARM::fixup_arm_pcrel_10:
+ Value = Value - 4; // ARM fixups offset by an additional word and don't
+ // need to adjust for the half-word ordering.
+ // Fall through.
+ case ARM::fixup_t2_pcrel_10: {
+ // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+ Value = Value - 4;
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ // These values don't encode the low two bits since they're always zero.
+ Value >>= 2;
+ assert ((Value < 256) && "Out of range pc-relative fixup value!");
+ Value |= isAdd << 23;
+
+ // Same addressing mode as fixup_arm_pcrel_10,
+ // but with 16-bit halfwords swapped.
+ if (Kind == ARM::fixup_t2_pcrel_10) {
+ uint32_t swapped = (Value & 0xFFFF0000) >> 16;
+ swapped |= (Value & 0x0000FFFF) << 16;
+ return swapped;
+ }
+
+ return Value;
+ }
+ }
+}
+
+namespace {
+
+// FIXME: This should be in a separate file.
+// ELF is an ELF of course...
+class ELFARMAsmBackend : public ARMAsmBackend {
+public:
+ Triple::OSType OSType;
+ ELFARMAsmBackend(const Target &T, Triple::OSType _OSType)
+ : ARMAsmBackend(T), OSType(_OSType) { }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(new ARMELFObjectWriter(OSType), OS,
+ /*IsLittleEndian*/ true);
+ }
+};
+
+// FIXME: Raise this to share code between Darwin and ELF.
+void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+ unsigned NumBytes = 4; // FIXME: 2 for Thumb
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset % NumBytes == 0 && "Offset mod NumBytes is nonzero!");
+
+ // For each byte of the fragment that the fixup touches, mask in the bits from
+ // the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+// FIXME: This should be in a separate file.
+class DarwinARMAsmBackend : public ARMAsmBackend {
+public:
+ DarwinARMAsmBackend(const Target &T) : ARMAsmBackend(T) { }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ // FIXME: Subtarget info should be derived. Force v7 for now.
+ return createMachObjectWriter(new ARMMachObjectWriter(
+ /*Is64Bit=*/false,
+ object::mach::CTM_ARM,
+ object::mach::CSARM_V7),
+ OS,
+ /*IsLittleEndian=*/true);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+};
+
+/// getFixupKindNumBytes - The number of bytes the fixup may change.
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+
+ case FK_Data_1:
+ case ARM::fixup_arm_thumb_bcc:
+ case ARM::fixup_arm_thumb_cp:
+ case ARM::fixup_thumb_adr_pcrel_10:
+ return 1;
+
+ case FK_Data_2:
+ case ARM::fixup_arm_thumb_br:
+ case ARM::fixup_arm_thumb_cb:
+ return 2;
+
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ return 3;
+
+ case FK_Data_4:
+ case ARM::fixup_t2_ldst_pcrel_12:
+ case ARM::fixup_t2_condbranch:
+ case ARM::fixup_t2_uncondbranch:
+ case ARM::fixup_t2_pcrel_10:
+ case ARM::fixup_t2_adr_pcrel_12:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ return 4;
+ }
+}
+
+void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+ unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the
+ // bits from the fixup value.
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+} // end anonymous namespace
+
+TargetAsmBackend *llvm::createARMAsmBackend(const Target &T,
+ const std::string &TT) {
+ switch (Triple(TT).getOS()) {
+ case Triple::Darwin:
+ return new DarwinARMAsmBackend(T);
+ case Triple::MinGW32:
+ case Triple::Cygwin:
+ case Triple::Win32:
+ assert(0 && "Windows not supported on ARM");
+ default:
+ return new ELFARMAsmBackend(T, Triple(TT).getOS());
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
new file mode 100644
index 0000000..db12b8e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -0,0 +1,1556 @@
+//===-- ARMAsmPrinter.cpp - Print machine code to an ARM .s file ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format ARM assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARM.h"
+#include "ARMAsmPrinter.h"
+#include "ARMAddressingModes.h"
+#include "ARMBuildAttrs.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMMCExpr.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "InstPrinter/ARMInstPrinter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+ // Per section and per symbol attributes are not supported.
+ // To implement them we would need the ability to delay this emission
+ // until the assembly file is fully parsed/generated as only then do we
+ // know the symbol and section numbers.
+ class AttributeEmitter {
+ public:
+ virtual void MaybeSwitchVendor(StringRef Vendor) = 0;
+ virtual void EmitAttribute(unsigned Attribute, unsigned Value) = 0;
+ virtual void EmitTextAttribute(unsigned Attribute, StringRef String) = 0;
+ virtual void Finish() = 0;
+ virtual ~AttributeEmitter() {}
+ };
+
+ class AsmAttributeEmitter : public AttributeEmitter {
+ MCStreamer &Streamer;
+
+ public:
+ AsmAttributeEmitter(MCStreamer &Streamer_) : Streamer(Streamer_) {}
+ void MaybeSwitchVendor(StringRef Vendor) { }
+
+ void EmitAttribute(unsigned Attribute, unsigned Value) {
+ Streamer.EmitRawText("\t.eabi_attribute " +
+ Twine(Attribute) + ", " + Twine(Value));
+ }
+
+ void EmitTextAttribute(unsigned Attribute, StringRef String) {
+ switch (Attribute) {
+ case ARMBuildAttrs::CPU_name:
+ Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String));
+ break;
+ default: assert(0 && "Unsupported Text attribute in ASM Mode"); break;
+ }
+ }
+ void Finish() { }
+ };
+
+ class ObjectAttributeEmitter : public AttributeEmitter {
+ MCObjectStreamer &Streamer;
+ StringRef CurrentVendor;
+ SmallString<64> Contents;
+
+ public:
+ ObjectAttributeEmitter(MCObjectStreamer &Streamer_) :
+ Streamer(Streamer_), CurrentVendor("") { }
+
+ void MaybeSwitchVendor(StringRef Vendor) {
+ assert(!Vendor.empty() && "Vendor cannot be empty.");
+
+ if (CurrentVendor.empty())
+ CurrentVendor = Vendor;
+ else if (CurrentVendor == Vendor)
+ return;
+ else
+ Finish();
+
+ CurrentVendor = Vendor;
+
+ assert(Contents.size() == 0);
+ }
+
+ void EmitAttribute(unsigned Attribute, unsigned Value) {
+ // FIXME: should be ULEB
+ Contents += Attribute;
+ Contents += Value;
+ }
+
+ void EmitTextAttribute(unsigned Attribute, StringRef String) {
+ Contents += Attribute;
+ Contents += UppercaseString(String);
+ Contents += 0;
+ }
+
+ void Finish() {
+ const size_t ContentsSize = Contents.size();
+
+ // Vendor size + Vendor name + '\0'
+ const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
+
+ // Tag + Tag Size
+ const size_t TagHeaderSize = 1 + 4;
+
+ Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
+ Streamer.EmitBytes(CurrentVendor, 0);
+ Streamer.EmitIntValue(0, 1); // '\0'
+
+ Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
+ Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
+
+ Streamer.EmitBytes(Contents, 0);
+
+ Contents.clear();
+ }
+ };
+
+} // end of anonymous namespace
+
+MachineLocation ARMAsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+}
+
+void ARMAsmPrinter::EmitFunctionEntryLabel() {
+ if (AFI->isThumbFunction()) {
+ OutStreamer.EmitAssemblerFlag(MCAF_Code16);
+ OutStreamer.EmitThumbFunc(Subtarget->isTargetDarwin()? CurrentFnSym : 0);
+ }
+
+ OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+/// runOnMachineFunction - This uses the EmitInstruction()
+/// method to print assembly for each instruction.
+///
+bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ AFI = MF.getInfo<ARMFunctionInfo>();
+ MCP = MF.getConstantPool();
+
+ return AsmPrinter::runOnMachineFunction(MF);
+}
+
+void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned TF = MO.getTargetFlags();
+
+ switch (MO.getType()) {
+ default:
+ assert(0 && "<unknown operand type>");
+ case MachineOperand::MO_Register: {
+ unsigned Reg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ O << ARMInstPrinter::getRegisterName(Reg);
+ break;
+ }
+ case MachineOperand::MO_Immediate: {
+ int64_t Imm = MO.getImm();
+ O << '#';
+ if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+ (TF == ARMII::MO_LO16))
+ O << ":lower16:";
+ else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+ (TF == ARMII::MO_HI16))
+ O << ":upper16:";
+ O << Imm;
+ break;
+ }
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+ (TF & ARMII::MO_LO16))
+ O << ":lower16:";
+ else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+ (TF & ARMII::MO_HI16))
+ O << ":upper16:";
+ O << *Mang->getSymbol(GV);
+
+ printOffset(MO.getOffset(), O);
+ if (TF == ARMII::MO_PLT)
+ O << "(PLT)";
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ if (TF == ARMII::MO_PLT)
+ O << "(PLT)";
+ break;
+ }
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << *GetCPISymbol(MO.getIndex());
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ O << *GetJTISymbol(MO.getIndex());
+ break;
+ }
+}
+
+//===--------------------------------------------------------------------===//
+
+MCSymbol *ARMAsmPrinter::
+GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const {
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+ << getFunctionNumber() << '_' << uid << '_' << uid2
+ << "_set_" << MBB->getNumber();
+ return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *ARMAsmPrinter::
+GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI"
+ << getFunctionNumber() << '_' << uid << '_' << uid2;
+ return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+
+MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel(void) const {
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH"
+ << getFunctionNumber();
+ return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'a': // Print as a memory address.
+ if (MI->getOperand(OpNum).isReg()) {
+ O << "["
+ << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg())
+ << "]";
+ return false;
+ }
+ // Fallthrough
+ case 'c': // Don't print "#" before an immediate operand.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << MI->getOperand(OpNum).getImm();
+ return false;
+ case 'P': // Print a VFP double precision register.
+ case 'q': // Print a NEON quad precision register.
+ printOperand(MI, OpNum, O);
+ return false;
+ case 'Q':
+ case 'R':
+ case 'H':
+ // These modifiers are not yet supported.
+ return true;
+ }
+ }
+
+ printOperand(MI, OpNum, O);
+ return false;
+}
+
+bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNum, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isReg() && "unexpected inline asm memory operand");
+ O << "[" << ARMInstPrinter::getRegisterName(MO.getReg()) << "]";
+ return false;
+}
+
+void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (Subtarget->isTargetDarwin()) {
+ Reloc::Model RelocM = TM.getRelocationModel();
+ if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) {
+ // Declare all the text sections up front (before the DWARF sections
+ // emitted by AsmPrinter::doInitialization) so the assembler will keep
+ // them together at the beginning of the object file. This helps
+ // avoid out-of-range branches that are due a fundamental limitation of
+ // the way symbol offsets are encoded with the current Darwin ARM
+ // relocations.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(
+ getObjFileLowering());
+ OutStreamer.SwitchSection(TLOFMacho.getTextSection());
+ OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
+ OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection());
+ if (RelocM == Reloc::DynamicNoPIC) {
+ const MCSection *sect =
+ OutContext.getMachOSection("__TEXT", "__symbol_stub4",
+ MCSectionMachO::S_SYMBOL_STUBS,
+ 12, SectionKind::getText());
+ OutStreamer.SwitchSection(sect);
+ } else {
+ const MCSection *sect =
+ OutContext.getMachOSection("__TEXT", "__picsymbolstub4",
+ MCSectionMachO::S_SYMBOL_STUBS,
+ 16, SectionKind::getText());
+ OutStreamer.SwitchSection(sect);
+ }
+ const MCSection *StaticInitSect =
+ OutContext.getMachOSection("__TEXT", "__StaticInit",
+ MCSectionMachO::S_REGULAR |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ OutStreamer.SwitchSection(StaticInitSect);
+ }
+ }
+
+ // Use unified assembler syntax.
+ OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
+
+ // Emit ARM Build Attributes
+ if (Subtarget->isTargetELF()) {
+
+ emitAttributes();
+ }
+}
+
+
+void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (Subtarget->isTargetDarwin()) {
+ // All darwin targets use mach-o.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ MachineModuleInfoMachO &MMIMacho =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ // Output non-lazy-pointers for external and common global variables.
+ MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList();
+
+ if (!Stubs.empty()) {
+ // Switch with ".non_lazy_symbol_pointer" directive.
+ OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+ EmitAlignment(2);
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .indirect_symbol _foo
+ MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+ OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),MCSA_IndirectSymbol);
+
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info
+ // pointers need to be indirect and pc-rel. We accomplish this by
+ // using NLPs; however, sometimes the types are local to the file.
+ // We need to fill in the value for the NLP in those cases.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+ OutContext),
+ 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ Stubs = MMIMacho.GetHiddenGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ EmitAlignment(2);
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::
+ Create(Stubs[i].second.getPointer(),
+ OutContext),
+ 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+// FIXME:
+// The following seem like one-off assembler flags, but they actually need
+// to appear in the .ARM.attributes section in ELF.
+// Instead of subclassing the MCELFStreamer, we do the work here.
+
+void ARMAsmPrinter::emitAttributes() {
+
+ emitARMAttributeSection();
+
+ AttributeEmitter *AttrEmitter;
+ if (OutStreamer.hasRawTextSupport())
+ AttrEmitter = new AsmAttributeEmitter(OutStreamer);
+ else {
+ MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer);
+ AttrEmitter = new ObjectAttributeEmitter(O);
+ }
+
+ AttrEmitter->MaybeSwitchVendor("aeabi");
+
+ std::string CPUString = Subtarget->getCPUString();
+
+ if (CPUString == "cortex-a8" ||
+ Subtarget->isCortexA8()) {
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a8");
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
+ // Fixme: figure out when this is emitted.
+ //AttrEmitter->EmitAttribute(ARMBuildAttrs::WMMX_arch,
+ // ARMBuildAttrs::AllowWMMXv1);
+ //
+
+ /// ADD additional Else-cases here!
+ } else if (CPUString == "generic") {
+ // FIXME: Why these defaults?
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::Allowed);
+ }
+
+ // FIXME: Emit FPU type
+ if (Subtarget->hasVFP2())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv2);
+
+ // Signal various FP modes.
+ if (!UnsafeFPMath) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
+ ARMBuildAttrs::Allowed);
+ }
+
+ if (NoInfsFPMath && NoNaNsFPMath)
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::Allowed);
+ else
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::AllowIEE754);
+
+ // FIXME: add more flags to ARMBuildAttrs.h
+ // 8-bytes alignment stuff.
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_needed, 1);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
+
+ // Hard float. Use both S and D registers and conform to AAPCS-VFP.
+ if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
+ }
+ // FIXME: Should we signal R9 usage?
+
+ if (Subtarget->hasDivide())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::DIV_use, 1);
+
+ AttrEmitter->Finish();
+ delete AttrEmitter;
+}
+
+void ARMAsmPrinter::emitARMAttributeSection() {
+ // <format-version>
+ // [ <section-length> "vendor-name"
+ // [ <file-tag> <size> <attribute>*
+ // | <section-tag> <size> <section-number>* 0 <attribute>*
+ // | <symbol-tag> <size> <symbol-number>* 0 <attribute>*
+ // ]+
+ // ]*
+
+ if (OutStreamer.hasRawTextSupport())
+ return;
+
+ const ARMElfTargetObjectFile &TLOFELF =
+ static_cast<const ARMElfTargetObjectFile &>
+ (getObjFileLowering());
+
+ OutStreamer.SwitchSection(TLOFELF.getAttributesSection());
+
+ // Format version
+ OutStreamer.EmitIntValue(0x41, 1);
+}
+
+//===----------------------------------------------------------------------===//
+
+static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
+ unsigned LabelId, MCContext &Ctx) {
+
+ MCSymbol *Label = Ctx.GetOrCreateSymbol(Twine(Prefix)
+ + "PC" + Twine(FunctionNumber) + "_" + Twine(LabelId));
+ return Label;
+}
+
+static MCSymbolRefExpr::VariantKind
+getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
+ switch (Modifier) {
+ default: llvm_unreachable("Unknown modifier!");
+ case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None;
+ case ARMCP::TLSGD: return MCSymbolRefExpr::VK_ARM_TLSGD;
+ case ARMCP::TPOFF: return MCSymbolRefExpr::VK_ARM_TPOFF;
+ case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_ARM_GOTTPOFF;
+ case ARMCP::GOT: return MCSymbolRefExpr::VK_ARM_GOT;
+ case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_ARM_GOTOFF;
+ }
+ return MCSymbolRefExpr::VK_None;
+}
+
+MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
+ bool isIndirect = Subtarget->isTargetDarwin() &&
+ Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
+ if (!isIndirect)
+ return Mang->getSymbol(GV);
+
+ // FIXME: Remove this when Darwin transition to @GOT like syntax.
+ MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoMachO &MMIMachO =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) :
+ MMIMachO.getGVStubEntry(MCSym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ return MCSym;
+}
+
+void ARMAsmPrinter::
+EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ int Size = TM.getTargetData()->getTypeAllocSize(MCPV->getType());
+
+ ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
+
+ MCSymbol *MCSym;
+ if (ACPV->isLSDA()) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
+ MCSym = OutContext.GetOrCreateSymbol(OS.str());
+ } else if (ACPV->isBlockAddress()) {
+ MCSym = GetBlockAddressSymbol(ACPV->getBlockAddress());
+ } else if (ACPV->isGlobalValue()) {
+ const GlobalValue *GV = ACPV->getGV();
+ MCSym = GetARMGVSymbol(GV);
+ } else {
+ assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
+ MCSym = GetExternalSymbolSymbol(ACPV->getSymbol());
+ }
+
+ // Create an MCSymbol for the reference.
+ const MCExpr *Expr =
+ MCSymbolRefExpr::Create(MCSym, getModifierVariantKind(ACPV->getModifier()),
+ OutContext);
+
+ if (ACPV->getPCAdjustment()) {
+ MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ ACPV->getLabelId(),
+ OutContext);
+ const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, OutContext);
+ PCRelExpr =
+ MCBinaryExpr::CreateAdd(PCRelExpr,
+ MCConstantExpr::Create(ACPV->getPCAdjustment(),
+ OutContext),
+ OutContext);
+ if (ACPV->mustAddCurrentAddress()) {
+ // We want "(<expr> - .)", but MC doesn't have a concept of the '.'
+ // label, so just emit a local label end reference that instead.
+ MCSymbol *DotSym = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(DotSym);
+ const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+ PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext);
+ }
+ Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext);
+ }
+ OutStreamer.EmitValue(Expr, Size);
+}
+
+void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ int OpNum = 1;
+ if (Opcode == ARM::BR_JTadd)
+ OpNum = 2;
+ else if (Opcode == ARM::BR_JTm)
+ OpNum = 3;
+
+ const MachineOperand &MO1 = MI->getOperand(OpNum);
+ const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+ unsigned JTI = MO1.getIndex();
+
+ // Emit a label for the jump table.
+ MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+ OutStreamer.EmitLabel(JTISymbol);
+
+ // Emit each entry of the table.
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ // Construct an MCExpr for the entry. We want a value of the form:
+ // (BasicBlockAddr - TableBeginAddr)
+ //
+ // For example, a table with entries jumping to basic blocks BB0 and BB1
+ // would look like:
+ // LJTI_0_0:
+ // .word (LBB0 - LJTI_0_0)
+ // .word (LBB1 - LJTI_0_0)
+ const MCExpr *Expr = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol,
+ OutContext),
+ OutContext);
+ OutStreamer.EmitValue(Expr, 4);
+ }
+}
+
+void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1;
+ const MachineOperand &MO1 = MI->getOperand(OpNum);
+ const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+ unsigned JTI = MO1.getIndex();
+
+ // Emit a label for the jump table.
+ MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+ OutStreamer.EmitLabel(JTISymbol);
+
+ // Emit each entry of the table.
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ unsigned OffsetWidth = 4;
+ if (MI->getOpcode() == ARM::t2TBB_JT)
+ OffsetWidth = 1;
+ else if (MI->getOpcode() == ARM::t2TBH_JT)
+ OffsetWidth = 2;
+
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(),
+ OutContext);
+ // If this isn't a TBB or TBH, the entries are direct branch instructions.
+ if (OffsetWidth == 4) {
+ MCInst BrInst;
+ BrInst.setOpcode(ARM::t2B);
+ BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr));
+ OutStreamer.EmitInstruction(BrInst);
+ continue;
+ }
+ // Otherwise it's an offset from the dispatch instruction. Construct an
+ // MCExpr for the entry. We want a value of the form:
+ // (BasicBlockAddr - TableBeginAddr) / 2
+ //
+ // For example, a TBB table with entries jumping to basic blocks BB0 and BB1
+ // would look like:
+ // LJTI_0_0:
+ // .byte (LBB0 - LJTI_0_0) / 2
+ // .byte (LBB1 - LJTI_0_0) / 2
+ const MCExpr *Expr =
+ MCBinaryExpr::CreateSub(MBBSymbolExpr,
+ MCSymbolRefExpr::Create(JTISymbol, OutContext),
+ OutContext);
+ Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(2, OutContext),
+ OutContext);
+ OutStreamer.EmitValue(Expr, OffsetWidth);
+ }
+}
+
+void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==4);
+ OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ OS << V.getName();
+ OS << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
+ OS << ']';
+ OS << "+";
+ printOperand(MI, NOps-2, OS);
+}
+
+static void populateADROperands(MCInst &Inst, unsigned Dest,
+ const MCSymbol *Label,
+ unsigned pred, unsigned ccreg,
+ MCContext &Ctx) {
+ const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, Ctx);
+ Inst.addOperand(MCOperand::CreateReg(Dest));
+ Inst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+ // Add predicate operands.
+ Inst.addOperand(MCOperand::CreateImm(pred));
+ Inst.addOperand(MCOperand::CreateReg(ccreg));
+}
+
+void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI,
+ unsigned Opcode) {
+ MCInst TmpInst;
+
+ // Emit the instruction as usual, just patch the opcode.
+ LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+ TmpInst.setOpcode(Opcode);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default: break;
+ case ARM::t2ADDrSPi:
+ case ARM::t2ADDrSPi12:
+ case ARM::t2SUBrSPi:
+ case ARM::t2SUBrSPi12:
+ assert ((MI->getOperand(1).getReg() == ARM::SP) &&
+ "Unexpected source register!");
+ break;
+
+ case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass");
+ case ARM::DBG_VALUE: {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ SmallString<128> TmpStr;
+ raw_svector_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+ }
+ case ARM::tBfar: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tBL);
+ TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MI->getOperand(0).getMBB()->getSymbol(), OutContext)));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::LEApcrel:
+ case ARM::tLEApcrel:
+ case ARM::t2LEApcrel: {
+ // FIXME: Need to also handle globals and externals
+ MCInst TmpInst;
+ TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrel ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
+ : ARM::ADR));
+ populateADROperands(TmpInst, MI->getOperand(0).getReg(),
+ GetCPISymbol(MI->getOperand(1).getIndex()),
+ MI->getOperand(2).getImm(), MI->getOperand(3).getReg(),
+ OutContext);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::LEApcrelJT:
+ case ARM::tLEApcrelJT:
+ case ARM::t2LEApcrelJT: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrelJT ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
+ : ARM::ADR));
+ populateADROperands(TmpInst, MI->getOperand(0).getReg(),
+ GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
+ MI->getOperand(2).getImm()),
+ MI->getOperand(3).getImm(), MI->getOperand(4).getReg(),
+ OutContext);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::MOVPCRX: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::BXr9_CALL:
+ case ARM::BX_CALL: {
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::BX);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::BMOVPCRXr9_CALL:
+ case ARM::BMOVPCRX_CALL: {
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::MOVi16_ga_pcrel:
+ case ARM::t2MOVi16_ga_pcrel: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc == ARM::MOVi16_ga_pcrel? ARM::MOVi16 : ARM::t2MOVi16);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+
+ unsigned TF = MI->getOperand(1).getTargetFlags();
+ bool isPIC = TF == ARMII::MO_LO16_NONLAZY_PIC;
+ const GlobalValue *GV = MI->getOperand(1).getGlobal();
+ MCSymbol *GVSym = GetARMGVSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ if (isPIC) {
+ MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ MI->getOperand(2).getImm(), OutContext);
+ const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+ unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4;
+ const MCExpr *PCRelExpr =
+ ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr,
+ MCBinaryExpr::CreateAdd(LabelSymExpr,
+ MCConstantExpr::Create(PCAdj, OutContext),
+ OutContext), OutContext), OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+ } else {
+ const MCExpr *RefExpr= ARMMCExpr::CreateLower16(GVSymExpr, OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+ }
+
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::MOVTi16_ga_pcrel:
+ case ARM::t2MOVTi16_ga_pcrel: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc == ARM::MOVTi16_ga_pcrel
+ ? ARM::MOVTi16 : ARM::t2MOVTi16);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+
+ unsigned TF = MI->getOperand(2).getTargetFlags();
+ bool isPIC = TF == ARMII::MO_HI16_NONLAZY_PIC;
+ const GlobalValue *GV = MI->getOperand(2).getGlobal();
+ MCSymbol *GVSym = GetARMGVSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ if (isPIC) {
+ MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ MI->getOperand(3).getImm(), OutContext);
+ const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+ unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4;
+ const MCExpr *PCRelExpr =
+ ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr,
+ MCBinaryExpr::CreateAdd(LabelSymExpr,
+ MCConstantExpr::Create(PCAdj, OutContext),
+ OutContext), OutContext), OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+ } else {
+ const MCExpr *RefExpr= ARMMCExpr::CreateUpper16(GVSymExpr, OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+ }
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::tPICADD: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // add r0, pc
+ // This adds the address of LPC0 to r0.
+
+ // Emit the label.
+ OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(), MI->getOperand(2).getImm(),
+ OutContext));
+
+ // Form and emit the add.
+ MCInst AddInst;
+ AddInst.setOpcode(ARM::tADDhirr);
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ AddInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ AddInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(AddInst);
+ return;
+ }
+ case ARM::PICADD: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // add r0, pc, r0
+ // This adds the address of LPC0 to r0.
+
+ // Emit the label.
+ OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(), MI->getOperand(2).getImm(),
+ OutContext));
+
+ // Form and emit the add.
+ MCInst AddInst;
+ AddInst.setOpcode(ARM::ADDrr);
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ // Add predicate operands.
+ AddInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
+ // Add 's' bit operand (always reg0 for this)
+ AddInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(AddInst);
+ return;
+ }
+ case ARM::PICSTR:
+ case ARM::PICSTRB:
+ case ARM::PICSTRH:
+ case ARM::PICLDR:
+ case ARM::PICLDRB:
+ case ARM::PICLDRH:
+ case ARM::PICLDRSB:
+ case ARM::PICLDRSH: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // OP r0, [pc, r0]
+ // The LCP0 label is referenced by a constant pool entry in order to get
+ // a PC-relative address at the ldr instruction.
+
+ // Emit the label.
+ OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(), MI->getOperand(2).getImm(),
+ OutContext));
+
+ // Form and emit the load
+ unsigned Opcode;
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case ARM::PICSTR: Opcode = ARM::STRrs; break;
+ case ARM::PICSTRB: Opcode = ARM::STRBrs; break;
+ case ARM::PICSTRH: Opcode = ARM::STRH; break;
+ case ARM::PICLDR: Opcode = ARM::LDRrs; break;
+ case ARM::PICLDRB: Opcode = ARM::LDRBrs; break;
+ case ARM::PICLDRH: Opcode = ARM::LDRH; break;
+ case ARM::PICLDRSB: Opcode = ARM::LDRSB; break;
+ case ARM::PICLDRSH: Opcode = ARM::LDRSH; break;
+ }
+ MCInst LdStInst;
+ LdStInst.setOpcode(Opcode);
+ LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ LdStInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ LdStInst.addOperand(MCOperand::CreateImm(0));
+ // Add predicate operands.
+ LdStInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
+ LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
+ OutStreamer.EmitInstruction(LdStInst);
+
+ return;
+ }
+ case ARM::CONSTPOOL_ENTRY: {
+ /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool
+ /// in the function. The first operand is the ID# for this instruction, the
+ /// second is the index into the MachineConstantPool that this is, the third
+ /// is the size in bytes of this constant pool entry.
+ unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
+ unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
+
+ EmitAlignment(2);
+ OutStreamer.EmitLabel(GetCPISymbol(LabelId));
+
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
+ if (MCPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(MCPE.Val.ConstVal);
+
+ return;
+ }
+ case ARM::t2BR_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVgpr2gpr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ // Output the data for the jump table itself
+ EmitJump2Table(MI);
+ return;
+ }
+ case ARM::t2TBB_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ MCInst TmpInst;
+
+ TmpInst.setOpcode(ARM::t2TBB);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ // Output the data for the jump table itself
+ EmitJump2Table(MI);
+ // Make sure the next instruction is 2-byte aligned.
+ EmitAlignment(1);
+ return;
+ }
+ case ARM::t2TBH_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ MCInst TmpInst;
+
+ TmpInst.setOpcode(ARM::t2TBH);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ // Output the data for the jump table itself
+ EmitJump2Table(MI);
+ return;
+ }
+ case ARM::tBR_JTr:
+ case ARM::BR_JTr: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // mov pc, target
+ MCInst TmpInst;
+ unsigned Opc = MI->getOpcode() == ARM::BR_JTr ?
+ ARM::MOVr : ARM::tMOVgpr2gpr;
+ TmpInst.setOpcode(Opc);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ if (Opc == ARM::MOVr)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Make sure the Thumb jump table is 4-byte aligned.
+ if (Opc == ARM::tMOVgpr2gpr)
+ EmitAlignment(2);
+
+ // Output the data for the jump table itself
+ EmitJumpTable(MI);
+ return;
+ }
+ case ARM::BR_JTm: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // ldr pc, target
+ MCInst TmpInst;
+ if (MI->getOperand(1).getReg() == 0) {
+ // literal offset
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
+ } else {
+ TmpInst.setOpcode(ARM::LDRrs);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ }
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Output the data for the jump table itself
+ EmitJumpTable(MI);
+ return;
+ }
+ case ARM::BR_JTadd: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // add pc, target, idx
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADDrr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Output the data for the jump table itself
+ EmitJumpTable(MI);
+ return;
+ }
+ case ARM::TRAP: {
+ // Non-Darwin binutils don't yet support the "trap" mnemonic.
+ // FIXME: Remove this special case when they do.
+ if (!Subtarget->isTargetDarwin()) {
+ //.long 0xe7ffdefe @ trap
+ uint32_t Val = 0xe7ffdefeUL;
+ OutStreamer.AddComment("trap");
+ OutStreamer.EmitIntValue(Val, 4);
+ return;
+ }
+ break;
+ }
+ case ARM::tTRAP: {
+ // Non-Darwin binutils don't yet support the "trap" mnemonic.
+ // FIXME: Remove this special case when they do.
+ if (!Subtarget->isTargetDarwin()) {
+ //.short 57086 @ trap
+ uint16_t Val = 0xdefe;
+ OutStreamer.AddComment("trap");
+ OutStreamer.EmitIntValue(Val, 2);
+ return;
+ }
+ break;
+ }
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ case ARM::tInt_eh_sjlj_setjmp: {
+ // Two incoming args: GPR:$src, GPR:$val
+ // mov $val, pc
+ // adds $val, #7
+ // str $val, [$src, #4]
+ // movs r0, #0
+ // b 1f
+ // movs r0, #1
+ // 1:
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ValReg = MI->getOperand(1).getReg();
+ MCSymbol *Label = GetARMSJLJEHLabel();
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVgpr2tgpr);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // 's' bit operand
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tADDi3);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ // 's' bit operand
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateImm(7));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tSTRi);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ // The offset immediate is #4. The operand value is scaled by 4 for the
+ // tSTR instruction.
+ TmpInst.addOperand(MCOperand::CreateImm(1));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVi8);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tB);
+ TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVi8);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ TmpInst.addOperand(MCOperand::CreateImm(1));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("eh_setjmp end");
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ OutStreamer.EmitLabel(Label);
+ return;
+ }
+
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ case ARM::Int_eh_sjlj_setjmp: {
+ // Two incoming args: GPR:$src, GPR:$val
+ // add $val, pc, #8
+ // str $val, [$src, #+4]
+ // mov r0, #0
+ // add pc, pc, #0
+ // mov r0, #1
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ValReg = MI->getOperand(1).getReg();
+
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADDri);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateImm(8));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::STRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(4));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVi);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADDri);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVi);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateImm(1));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("eh_setjmp end");
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::Int_eh_sjlj_longjmp: {
+ // ldr sp, [$src, #8]
+ // ldr $scratch, [$src, #4]
+ // ldr r7, [$src]
+ // bx $scratch
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ScratchReg = MI->getOperand(1).getReg();
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(8));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(4));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::BX);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::tInt_eh_sjlj_longjmp: {
+ // ldr $scratch, [$src, #8]
+ // mov sp, $scratch
+ // ldr $scratch, [$src, #4]
+ // ldr r7, [$src]
+ // bx $scratch
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ScratchReg = MI->getOperand(1).getReg();
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tLDRi);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ // The offset immediate is #8. The operand value is scaled by 4 for the
+ // tLDR instruction.
+ TmpInst.addOperand(MCOperand::CreateImm(2));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVtgpr2gpr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tLDRi);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(1));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tLDRr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tBX_RET_vararg);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ // These are the pseudos created to comply with stricter operand restrictions
+ // on ARMv5. Lower them now to "normal" instructions, since all the
+ // restrictions are already satisfied.
+ case ARM::MULv5:
+ EmitPatchedInstruction(MI, ARM::MUL);
+ return;
+ case ARM::MLAv5:
+ EmitPatchedInstruction(MI, ARM::MLA);
+ return;
+ case ARM::SMULLv5:
+ EmitPatchedInstruction(MI, ARM::SMULL);
+ return;
+ case ARM::UMULLv5:
+ EmitPatchedInstruction(MI, ARM::UMULL);
+ return;
+ case ARM::SMLALv5:
+ EmitPatchedInstruction(MI, ARM::SMLAL);
+ return;
+ case ARM::UMLALv5:
+ EmitPatchedInstruction(MI, ARM::UMLAL);
+ return;
+ case ARM::UMAALv5:
+ EmitPatchedInstruction(MI, ARM::UMAAL);
+ return;
+ }
+
+ MCInst TmpInst;
+ LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+//===----------------------------------------------------------------------===//
+// Target Registry Stuff
+//===----------------------------------------------------------------------===//
+
+static MCInstPrinter *createARMMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI) {
+ if (SyntaxVariant == 0)
+ return new ARMInstPrinter(MAI);
+ return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMAsmPrinter() {
+ RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
+ RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
+
+ TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter);
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
new file mode 100644
index 0000000..5852684
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -0,0 +1,112 @@
+//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// ARM Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMASMPRINTER_H
+#define ARMASMPRINTER_H
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+namespace ARM {
+ enum DW_ISA {
+ DW_ISA_ARM_thumb = 1,
+ DW_ISA_ARM_arm = 2
+ };
+}
+
+class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when printing asm code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ /// AFI - Keep a pointer to ARMFunctionInfo for the current
+ /// MachineFunction.
+ ARMFunctionInfo *AFI;
+
+ /// MCP - Keep a pointer to constantpool entries of the current
+ /// MachineFunction.
+ const MachineConstantPool *MCP;
+
+public:
+ explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+ const char *Modifier = 0);
+
+ virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O);
+
+ void EmitJumpTable(const MachineInstr *MI);
+ void EmitJump2Table(const MachineInstr *MI);
+ virtual void EmitInstruction(const MachineInstr *MI);
+ bool runOnMachineFunction(MachineFunction &F);
+
+ virtual void EmitConstantPool() {} // we emit constant pools customly!
+ virtual void EmitFunctionEntryLabel();
+ void EmitStartOfAsmFile(Module &M);
+ void EmitEndOfAsmFile(Module &M);
+
+private:
+ // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+ void emitAttributes();
+
+ // Helper for ELF .o only
+ void emitARMAttributeSection();
+
+ // Generic helper used to emit e.g. ARMv5 mul pseudos
+ void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc);
+
+public:
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+
+ virtual unsigned getISAEncoding() {
+ // ARM/Darwin adds ISA to the DWARF info for each function.
+ if (!Subtarget->isTargetDarwin())
+ return 0;
+ return Subtarget->isThumb() ?
+ llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+ }
+
+ MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const;
+ MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
+
+ MCSymbol *GetARMSJLJEHLabel(void) const;
+
+ MCSymbol *GetARMGVSymbol(const GlobalValue *GV);
+
+ /// EmitMachineConstantPoolValue - Print a machine constantpool value to
+ /// the .s file.
+ virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h
new file mode 100644
index 0000000..a56cc1a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h
@@ -0,0 +1,249 @@
+//===-- ARMBaseInfo.h - Top level definitions for ARM -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the ARM target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEINFO_H
+#define ARMBASEINFO_H
+
+#include "llvm/Support/ErrorHandling.h"
+
+// Note that the following auto-generated files only defined enum types, and
+// so are safe to include here.
+
+// Defines symbolic names for ARM registers. This defines a mapping from
+// register name to register number.
+//
+#include "ARMGenRegisterNames.inc"
+
+// Defines symbolic names for the ARM instructions.
+//
+#include "ARMGenInstrNames.inc"
+
+namespace llvm {
+
+// Enums corresponding to ARM condition codes
+namespace ARMCC {
+ // The CondCodes constants map directly to the 4-bit encoding of the
+ // condition field for predicated instructions.
+ enum CondCodes { // Meaning (integer) Meaning (floating-point)
+ EQ, // Equal Equal
+ NE, // Not equal Not equal, or unordered
+ HS, // Carry set >, ==, or unordered
+ LO, // Carry clear Less than
+ MI, // Minus, negative Less than
+ PL, // Plus, positive or zero >, ==, or unordered
+ VS, // Overflow Unordered
+ VC, // No overflow Not unordered
+ HI, // Unsigned higher Greater than, or unordered
+ LS, // Unsigned lower or same Less than or equal
+ GE, // Greater than or equal Greater than or equal
+ LT, // Less than Less than, or unordered
+ GT, // Greater than Greater than
+ LE, // Less than or equal <, ==, or unordered
+ AL // Always (unconditional) Always (unconditional)
+ };
+
+ inline static CondCodes getOppositeCondition(CondCodes CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return NE;
+ case NE: return EQ;
+ case HS: return LO;
+ case LO: return HS;
+ case MI: return PL;
+ case PL: return MI;
+ case VS: return VC;
+ case VC: return VS;
+ case HI: return LS;
+ case LS: return HI;
+ case GE: return LT;
+ case LT: return GE;
+ case GT: return LE;
+ case LE: return GT;
+ }
+ }
+} // namespace ARMCC
+
+inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case ARMCC::EQ: return "eq";
+ case ARMCC::NE: return "ne";
+ case ARMCC::HS: return "hs";
+ case ARMCC::LO: return "lo";
+ case ARMCC::MI: return "mi";
+ case ARMCC::PL: return "pl";
+ case ARMCC::VS: return "vs";
+ case ARMCC::VC: return "vc";
+ case ARMCC::HI: return "hi";
+ case ARMCC::LS: return "ls";
+ case ARMCC::GE: return "ge";
+ case ARMCC::LT: return "lt";
+ case ARMCC::GT: return "gt";
+ case ARMCC::LE: return "le";
+ case ARMCC::AL: return "al";
+ }
+}
+
+namespace ARM_PROC {
+ enum IMod {
+ IE = 2,
+ ID = 3
+ };
+
+ enum IFlags {
+ F = 1,
+ I = 2,
+ A = 4
+ };
+
+ inline static const char *IFlagsToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown iflags operand");
+ case F: return "f";
+ case I: return "i";
+ case A: return "a";
+ }
+ }
+
+ inline static const char *IModToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown imod operand");
+ case IE: return "ie";
+ case ID: return "id";
+ }
+ }
+}
+
+namespace ARM_MB {
+ // The Memory Barrier Option constants map directly to the 4-bit encoding of
+ // the option field for memory barrier operations.
+ enum MemBOpt {
+ SY = 15,
+ ST = 14,
+ ISH = 11,
+ ISHST = 10,
+ NSH = 7,
+ NSHST = 6,
+ OSH = 3,
+ OSHST = 2
+ };
+
+ inline static const char *MemBOptToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown memory operation");
+ case SY: return "sy";
+ case ST: return "st";
+ case ISH: return "ish";
+ case ISHST: return "ishst";
+ case NSH: return "nsh";
+ case NSHST: return "nshst";
+ case OSH: return "osh";
+ case OSHST: return "oshst";
+ }
+ }
+} // namespace ARM_MB
+
+/// getARMRegisterNumbering - Given the enum value for some register, e.g.
+/// ARM::LR, return the number that it corresponds to (e.g. 14).
+inline static unsigned getARMRegisterNumbering(unsigned Reg) {
+ using namespace ARM;
+ switch (Reg) {
+ default:
+ llvm_unreachable("Unknown ARM register!");
+ case R0: case S0: case D0: case Q0: return 0;
+ case R1: case S1: case D1: case Q1: return 1;
+ case R2: case S2: case D2: case Q2: return 2;
+ case R3: case S3: case D3: case Q3: return 3;
+ case R4: case S4: case D4: case Q4: return 4;
+ case R5: case S5: case D5: case Q5: return 5;
+ case R6: case S6: case D6: case Q6: return 6;
+ case R7: case S7: case D7: case Q7: return 7;
+ case R8: case S8: case D8: case Q8: return 8;
+ case R9: case S9: case D9: case Q9: return 9;
+ case R10: case S10: case D10: case Q10: return 10;
+ case R11: case S11: case D11: case Q11: return 11;
+ case R12: case S12: case D12: case Q12: return 12;
+ case SP: case S13: case D13: case Q13: return 13;
+ case LR: case S14: case D14: case Q14: return 14;
+ case PC: case S15: case D15: case Q15: return 15;
+
+ case S16: case D16: return 16;
+ case S17: case D17: return 17;
+ case S18: case D18: return 18;
+ case S19: case D19: return 19;
+ case S20: case D20: return 20;
+ case S21: case D21: return 21;
+ case S22: case D22: return 22;
+ case S23: case D23: return 23;
+ case S24: case D24: return 24;
+ case S25: case D25: return 25;
+ case S26: case D26: return 26;
+ case S27: case D27: return 27;
+ case S28: case D28: return 28;
+ case S29: case D29: return 29;
+ case S30: case D30: return 30;
+ case S31: case D31: return 31;
+ }
+}
+
+namespace ARMII {
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // ARM Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ /// MO_LO16 - On a symbol operand, this represents a relocation containing
+ /// lower 16 bit of the address. Used only via movw instruction.
+ MO_LO16,
+
+ /// MO_HI16 - On a symbol operand, this represents a relocation containing
+ /// higher 16 bit of the address. Used only via movt instruction.
+ MO_HI16,
+
+ /// MO_LO16_NONLAZY - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+ /// i.e. "FOO$non_lazy_ptr".
+ /// Used only via movw instruction.
+ MO_LO16_NONLAZY,
+
+ /// MO_HI16_NONLAZY - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+ /// i.e. "FOO$non_lazy_ptr". Used only via movt instruction.
+ MO_HI16_NONLAZY,
+
+ /// MO_LO16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the PC relative address of the
+ /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+ /// Used only via movw instruction.
+ MO_LO16_NONLAZY_PIC,
+
+ /// MO_HI16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the PC relative address of the
+ /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+ /// Used only via movt instruction.
+ MO_HI16_NONLAZY_PIC,
+
+ /// MO_PLT - On a symbol operand, this represents an ELF PLT reference on a
+ /// call operand.
+ MO_PLT
+ };
+} // end namespace ARMII
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
new file mode 100644
index 0000000..2268e59
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -0,0 +1,2323 @@
+//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMBaseInstrInfo.h"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMHazardRecognizer.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMGenInstrInfo.inc"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
+ cl::desc("Enable ARM 2-addr to 3-addr conv"));
+
+/// ARM_MLxEntry - Record information about MLA / MLS instructions.
+struct ARM_MLxEntry {
+ unsigned MLxOpc; // MLA / MLS opcode
+ unsigned MulOpc; // Expanded multiplication opcode
+ unsigned AddSubOpc; // Expanded add / sub opcode
+ bool NegAcc; // True if the acc is negated before the add / sub.
+ bool HasLane; // True if instruction has an extra "lane" operand.
+};
+
+static const ARM_MLxEntry ARM_MLxTable[] = {
+ // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
+ // fp scalar ops
+ { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
+ { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
+ { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
+ { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
+ { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
+ { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
+ { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
+ { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
+
+ // fp SIMD ops
+ { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
+ { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
+ { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
+ { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
+ { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
+ { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
+ { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
+ { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
+};
+
+ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
+ : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
+ Subtarget(STI) {
+ for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
+ if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
+ assert(false && "Duplicated entries?");
+ MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
+ MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
+ }
+}
+
+// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
+// currently defaults to no prepass hazard recognizer.
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ if (usePreRAHazardRecognizer()) {
+ const InstrItineraryData *II = TM->getInstrItineraryData();
+ return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
+ }
+ return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+}
+
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ if (Subtarget.isThumb2() || Subtarget.hasVFP2())
+ return (ScheduleHazardRecognizer *)
+ new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
+ return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG);
+}
+
+MachineInstr *
+ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ // FIXME: Thumb2 support.
+
+ if (!EnableARM3Addr)
+ return NULL;
+
+ MachineInstr *MI = MBBI;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ uint64_t TSFlags = MI->getDesc().TSFlags;
+ bool isPre = false;
+ switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
+ default: return NULL;
+ case ARMII::IndexModePre:
+ isPre = true;
+ break;
+ case ARMII::IndexModePost:
+ break;
+ }
+
+ // Try splitting an indexed load/store to an un-indexed one plus an add/sub
+ // operation.
+ unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
+ if (MemOpc == 0)
+ return NULL;
+
+ MachineInstr *UpdateMI = NULL;
+ MachineInstr *MemMI = NULL;
+ unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned NumOps = TID.getNumOperands();
+ bool isLoad = !TID.mayStore();
+ const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
+ const MachineOperand &Base = MI->getOperand(2);
+ const MachineOperand &Offset = MI->getOperand(NumOps-3);
+ unsigned WBReg = WB.getReg();
+ unsigned BaseReg = Base.getReg();
+ unsigned OffReg = Offset.getReg();
+ unsigned OffImm = MI->getOperand(NumOps-2).getImm();
+ ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
+ switch (AddrMode) {
+ default:
+ assert(false && "Unknown indexed op!");
+ return NULL;
+ case ARMII::AddrMode2: {
+ bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+ if (OffReg == 0) {
+ if (ARM_AM::getSOImmVal(Amt) == -1)
+ // Can't encode it in a so_imm operand. This transformation will
+ // add more than 1 instruction. Abandon!
+ return NULL;
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(Amt)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else if (Amt != 0) {
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+ unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
+ .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ case ARMII::AddrMode3 : {
+ bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM3Offset(OffImm);
+ if (OffReg == 0)
+ // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(Amt)
+ .addImm(Pred).addReg(0).addReg(0);
+ else
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ }
+
+ std::vector<MachineInstr*> NewMIs;
+ if (isPre) {
+ if (isLoad)
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(WBReg).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+ NewMIs.push_back(MemMI);
+ NewMIs.push_back(UpdateMI);
+ } else {
+ if (isLoad)
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(BaseReg).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+ if (WB.isDead())
+ UpdateMI->getOperand(0).setIsDead();
+ NewMIs.push_back(UpdateMI);
+ NewMIs.push_back(MemMI);
+ }
+
+ // Transfer LiveVariables states, kill / dead info.
+ if (LV) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+
+ LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
+ if (MO.isDef()) {
+ MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
+ if (MO.isDead())
+ LV->addVirtualRegisterDead(Reg, NewMI);
+ }
+ if (MO.isUse() && MO.isKill()) {
+ for (unsigned j = 0; j < 2; ++j) {
+ // Look at the two new MI's in reverse order.
+ MachineInstr *NewMI = NewMIs[j];
+ if (!NewMI->readsRegister(Reg))
+ continue;
+ LV->addVirtualRegisterKilled(Reg, NewMI);
+ if (VI.removeKill(MI))
+ VI.Kills.push_back(NewMI);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ MFI->insert(MBBI, NewMIs[1]);
+ MFI->insert(MBBI, NewMIs[0]);
+ return NewMIs[0];
+}
+
+// Branch analysis.
+bool
+ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (isUncondBranchOpcode(LastOpc)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (isCondBranchOpcode(LastOpc)) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(LastInst->getOperand(1));
+ Cond.push_back(LastInst->getOperand(2));
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If AllowModify is true and the block ends with two or more unconditional
+ // branches, delete all but the first unconditional branch.
+ if (AllowModify && isUncondBranchOpcode(LastOpc)) {
+ while (isUncondBranchOpcode(SecondLastOpc)) {
+ LastInst->eraseFromParent();
+ LastInst = SecondLastInst;
+ LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ // Return now the only terminator is an unconditional branch.
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else {
+ SecondLastInst = I;
+ SecondLastOpc = SecondLastInst->getOpcode();
+ }
+ }
+ }
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with a B and a Bcc, handle it.
+ if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(1));
+ Cond.push_back(SecondLastInst->getOperand(2));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // ...likewise if it ends with a branch table followed by an unconditional
+ // branch. The branch folder can create these, and we must get rid of them for
+ // correctness of Thumb constant islands.
+ if ((isJumpTableBranchOpcode(SecondLastOpc) ||
+ isIndirectBranchOpcode(SecondLastOpc)) &&
+ isUncondBranchOpcode(LastOpc)) {
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+
+unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (!isUncondBranchOpcode(I->getOpcode()) &&
+ !isCondBranchOpcode(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!isCondBranchOpcode(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
+ int BOpc = !AFI->isThumbFunction()
+ ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
+ int BccOpc = !AFI->isThumbFunction()
+ ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
+
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "ARM branch conditions have two components!");
+
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch?
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ else
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+ return 1;
+ }
+
+ // Two-way conditional branch.
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+ return 2;
+}
+
+bool ARMBaseInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+ Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+ return false;
+}
+
+bool ARMBaseInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ unsigned Opc = MI->getOpcode();
+ if (isUncondBranchOpcode(Opc)) {
+ MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
+ MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
+ MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
+ return true;
+ }
+
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx != -1) {
+ MachineOperand &PMO = MI->getOperand(PIdx);
+ PMO.setImm(Pred[0].getImm());
+ MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
+ return true;
+ }
+ return false;
+}
+
+bool ARMBaseInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ if (Pred1.size() > 2 || Pred2.size() > 2)
+ return false;
+
+ ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
+ ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
+ if (CC1 == CC2)
+ return true;
+
+ switch (CC1) {
+ default:
+ return false;
+ case ARMCC::AL:
+ return true;
+ case ARMCC::HS:
+ return CC2 == ARMCC::HI;
+ case ARMCC::LS:
+ return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
+ case ARMCC::GE:
+ return CC2 == ARMCC::GT;
+ case ARMCC::LE:
+ return CC2 == ARMCC::LT;
+ }
+}
+
+bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // FIXME: This confuses implicit_def with optional CPSR def.
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
+ return false;
+
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == ARM::CPSR) {
+ Pred.push_back(MO);
+ Found = true;
+ }
+ }
+
+ return Found;
+}
+
+/// isPredicable - Return true if the specified instruction can be predicated.
+/// By default, this returns true for every instruction with a
+/// PredicateOperand.
+bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isPredicable())
+ return false;
+
+ if ((TID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+ ARMFunctionInfo *AFI =
+ MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+ return AFI->isThumb2Function();
+ }
+ return true;
+}
+
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
+LLVM_ATTRIBUTE_NOINLINE
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI);
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI) {
+ assert(JTI < JT.size());
+ return JT[JTI].MBBs.size();
+}
+
+/// GetInstSize - Return the size of the specified MachineInstr.
+///
+unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ const MachineBasicBlock &MBB = *MI->getParent();
+ const MachineFunction *MF = MBB.getParent();
+ const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+ // Basic size info comes from the TSFlags field.
+ const TargetInstrDesc &TID = MI->getDesc();
+ uint64_t TSFlags = TID.TSFlags;
+
+ unsigned Opc = MI->getOpcode();
+ switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
+ default: {
+ // If this machine instr is an inline asm, measure it.
+ if (MI->getOpcode() == ARM::INLINEASM)
+ return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
+ if (MI->isLabel())
+ return 0;
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unknown or unset size field for instr!");
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::DBG_VALUE:
+ return 0;
+ }
+ break;
+ }
+ case ARMII::Size8Bytes: return 8; // ARM instruction x 2.
+ case ARMII::Size4Bytes: return 4; // ARM / Thumb2 instruction.
+ case ARMII::Size2Bytes: return 2; // Thumb1 instruction.
+ case ARMII::SizeSpecial: {
+ switch (Opc) {
+ case ARM::MOVi16_ga_pcrel:
+ case ARM::MOVTi16_ga_pcrel:
+ case ARM::t2MOVi16_ga_pcrel:
+ case ARM::t2MOVTi16_ga_pcrel:
+ return 4;
+ case ARM::MOVi32imm:
+ case ARM::t2MOVi32imm:
+ return 8;
+ case ARM::CONSTPOOL_ENTRY:
+ // If this machine instr is a constant pool entry, its size is recorded as
+ // operand #2.
+ return MI->getOperand(2).getImm();
+ case ARM::Int_eh_sjlj_longjmp:
+ return 16;
+ case ARM::tInt_eh_sjlj_longjmp:
+ return 10;
+ case ARM::Int_eh_sjlj_setjmp:
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ return 20;
+ case ARM::tInt_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ return 12;
+ case ARM::BR_JTr:
+ case ARM::BR_JTm:
+ case ARM::BR_JTadd:
+ case ARM::tBR_JTr:
+ case ARM::t2BR_JT:
+ case ARM::t2TBB_JT:
+ case ARM::t2TBH_JT: {
+ // These are jumptable branches, i.e. a branch followed by an inlined
+ // jumptable. The size is 4 + 4 * number of entries. For TBB, each
+ // entry is one byte; TBH two byte each.
+ unsigned EntrySize = (Opc == ARM::t2TBB_JT)
+ ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
+ unsigned NumOps = TID.getNumOperands();
+ MachineOperand JTOP =
+ MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
+ unsigned JTI = JTOP.getIndex();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ assert(MJTI != 0);
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ assert(JTI < JT.size());
+ // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+ // 4 aligned. The assembler / linker may add 2 byte padding just before
+ // the JT entries. The size does not include this padding; the
+ // constant islands pass does separate bookkeeping for it.
+ // FIXME: If we know the size of the function is less than (1 << 16) *2
+ // bytes, we can use 16-bit entries instead. Then there won't be an
+ // alignment issue.
+ unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
+ unsigned NumEntries = getNumJTEntries(JT, JTI);
+ if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
+ // Make sure the instruction that follows TBB is 2-byte aligned.
+ // FIXME: Constant island pass should insert an "ALIGN" instruction
+ // instead.
+ ++NumEntries;
+ return NumEntries * EntrySize + InstSize;
+ }
+ default:
+ // Otherwise, pseudo-instruction sizes are zero.
+ return 0;
+ }
+ }
+ }
+ return 0; // Not reached
+}
+
+void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool GPRDest = ARM::GPRRegClass.contains(DestReg);
+ bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
+
+ if (GPRDest && GPRSrc) {
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))));
+ return;
+ }
+
+ bool SPRDest = ARM::SPRRegClass.contains(DestReg);
+ bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
+
+ unsigned Opc;
+ if (SPRDest && SPRSrc)
+ Opc = ARM::VMOVS;
+ else if (GPRDest && SPRSrc)
+ Opc = ARM::VMOVRS;
+ else if (SPRDest && GPRSrc)
+ Opc = ARM::VMOVSR;
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD;
+ else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQ;
+ else if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQQ;
+ else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQQQQ;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ)
+ AddDefaultPred(MIB);
+}
+
+static const
+MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
+ unsigned Reg, unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) {
+ if (!SubIdx)
+ return MIB.addReg(Reg, State);
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+ return MIB.addReg(Reg, State, SubIdx);
+}
+
+void ARMBaseInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(
+ PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ Align);
+
+ // tGPR is used sometimes in ARM instructions that need to avoid using
+ // certain registers. Just treat it as GPR here. Likewise, rGPR.
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+ || RC == ARM::rGPRRegisterClass)
+ RC = ARM::GPRRegisterClass;
+
+ switch (RC->getID()) {
+ case ARM::GPRRegClassID:
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ break;
+ case ARM::SPRRegClassID:
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ break;
+ case ARM::DPRRegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::DPR_8RegClassID:
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ break;
+ case ARM::QPRRegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::QPR_8RegClassID:
+ if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ }
+ break;
+ case ARM::QQPRRegClassID:
+ case ARM::QQPR_VFP2RegClassID:
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ // FIXME: It's possible to only store part of the QQ register if the
+ // spilled def has a sub-register index.
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ }
+ break;
+ case ARM::QQQQPRRegClassID: {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown regclass!");
+ }
+}
+
+unsigned
+ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::STRrs:
+ case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::STRi12:
+ case ARM::t2STRi12:
+ case ARM::tSpill:
+ case ARM::VSTRD:
+ case ARM::VSTRS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VST1q64Pseudo:
+ if (MI->getOperand(0).isFI() &&
+ MI->getOperand(2).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ break;
+ case ARM::VSTMQIA:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+
+ return 0;
+}
+
+void ARMBaseInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ Align);
+
+ // tGPR is used sometimes in ARM instructions that need to avoid using
+ // certain registers. Just treat it as GPR here.
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+ || RC == ARM::rGPRRegisterClass)
+ RC = ARM::GPRRegisterClass;
+
+ switch (RC->getID()) {
+ case ARM::GPRRegClassID:
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ break;
+ case ARM::SPRRegClassID:
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ break;
+ case ARM::DPRRegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::DPR_8RegClassID:
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ break;
+ case ARM::QPRRegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::QPR_8RegClassID:
+ if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ }
+ break;
+ case ARM::QQPRRegClassID:
+ case ARM::QQPR_VFP2RegClassID:
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ }
+ break;
+ case ARM::QQQQPRRegClassID: {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
+ AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown regclass!");
+ }
+}
+
+unsigned
+ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::LDRi12:
+ case ARM::t2LDRi12:
+ case ARM::tRestore:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VLD1q64Pseudo:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VLDMQIA:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+
+ return 0;
+}
+
+MachineInstr*
+ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+/// Create a copy of a const pool value. Update CPI to the new index and return
+/// the label UID.
+static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
+ MachineConstantPool *MCP = MF.getConstantPool();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
+ assert(MCPE.isMachineConstantPoolEntry() &&
+ "Expecting a machine constantpool entry!");
+ ARMConstantPoolValue *ACPV =
+ static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+ unsigned PCLabelId = AFI->createPICLabelUId();
+ ARMConstantPoolValue *NewCPV = 0;
+ // FIXME: The below assumes PIC relocation model and that the function
+ // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
+ // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
+ // instructions, so that's probably OK, but is PIC always correct when
+ // we get here?
+ if (ACPV->isGlobalValue())
+ NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
+ ARMCP::CPValue, 4);
+ else if (ACPV->isExtSymbol())
+ NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(),
+ ACPV->getSymbol(), PCLabelId, 4);
+ else if (ACPV->isBlockAddress())
+ NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
+ ARMCP::CPBlockAddress, 4);
+ else if (ACPV->isLSDA())
+ NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId,
+ ARMCP::CPLSDA, 4);
+ else
+ llvm_unreachable("Unexpected ARM constantpool value type!!");
+ CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
+ return PCLabelId;
+}
+
+void ARMBaseInstrInfo::
+reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const {
+ unsigned Opcode = Orig->getOpcode();
+ switch (Opcode) {
+ default: {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+ MBB.insert(I, MI);
+ break;
+ }
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ MachineFunction &MF = *MBB.getParent();
+ unsigned CPI = Orig->getOperand(1).getIndex();
+ unsigned PCLabelId = duplicateCPV(MF, CPI);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
+ DestReg)
+ .addConstantPoolIndex(CPI).addImm(PCLabelId);
+ (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
+ break;
+ }
+ }
+}
+
+MachineInstr *
+ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
+ MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF);
+ switch(Orig->getOpcode()) {
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ unsigned CPI = Orig->getOperand(1).getIndex();
+ unsigned PCLabelId = duplicateCPV(MF, CPI);
+ Orig->getOperand(1).setIndex(CPI);
+ Orig->getOperand(2).setImm(PCLabelId);
+ break;
+ }
+ }
+ return MI;
+}
+
+bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const {
+ int Opcode = MI0->getOpcode();
+ if (Opcode == ARM::t2LDRpci ||
+ Opcode == ARM::t2LDRpci_pic ||
+ Opcode == ARM::tLDRpci ||
+ Opcode == ARM::tLDRpci_pic ||
+ Opcode == ARM::MOV_ga_dyn ||
+ Opcode == ARM::MOV_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel_ldr ||
+ Opcode == ARM::t2MOV_ga_dyn ||
+ Opcode == ARM::t2MOV_ga_pcrel) {
+ if (MI1->getOpcode() != Opcode)
+ return false;
+ if (MI0->getNumOperands() != MI1->getNumOperands())
+ return false;
+
+ const MachineOperand &MO0 = MI0->getOperand(1);
+ const MachineOperand &MO1 = MI1->getOperand(1);
+ if (MO0.getOffset() != MO1.getOffset())
+ return false;
+
+ if (Opcode == ARM::MOV_ga_dyn ||
+ Opcode == ARM::MOV_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel_ldr ||
+ Opcode == ARM::t2MOV_ga_dyn ||
+ Opcode == ARM::t2MOV_ga_pcrel)
+ // Ignore the PC labels.
+ return MO0.getGlobal() == MO1.getGlobal();
+
+ const MachineFunction *MF = MI0->getParent()->getParent();
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ int CPI0 = MO0.getIndex();
+ int CPI1 = MO1.getIndex();
+ const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
+ const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
+ ARMConstantPoolValue *ACPV0 =
+ static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
+ ARMConstantPoolValue *ACPV1 =
+ static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
+ return ACPV0->hasSameValue(ACPV1);
+ } else if (Opcode == ARM::PICLDR) {
+ if (MI1->getOpcode() != Opcode)
+ return false;
+ if (MI0->getNumOperands() != MI1->getNumOperands())
+ return false;
+
+ unsigned Addr0 = MI0->getOperand(1).getReg();
+ unsigned Addr1 = MI1->getOperand(1).getReg();
+ if (Addr0 != Addr1) {
+ if (!MRI ||
+ !TargetRegisterInfo::isVirtualRegister(Addr0) ||
+ !TargetRegisterInfo::isVirtualRegister(Addr1))
+ return false;
+
+ // This assumes SSA form.
+ MachineInstr *Def0 = MRI->getVRegDef(Addr0);
+ MachineInstr *Def1 = MRI->getVRegDef(Addr1);
+ // Check if the loaded value, e.g. a constantpool of a global address, are
+ // the same.
+ if (!produceSameValue(Def0, Def1, MRI))
+ return false;
+ }
+
+ for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
+ // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
+ const MachineOperand &MO0 = MI0->getOperand(i);
+ const MachineOperand &MO1 = MI1->getOperand(i);
+ if (!MO0.isIdenticalTo(MO1))
+ return false;
+ }
+ return true;
+ }
+
+ return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
+/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+/// determine if two loads are loading from the same base address. It should
+/// only return true if the base pointers are the same and the only differences
+/// between the two addresses is the offset. It also returns the offsets by
+/// reference.
+bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1,
+ int64_t &Offset2) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+ return false;
+
+ switch (Load1->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDRi12:
+ case ARM::LDRBi12:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ switch (Load2->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDRi12:
+ case ARM::LDRBi12:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ // Check if base addresses and chain operands match.
+ if (Load1->getOperand(0) != Load2->getOperand(0) ||
+ Load1->getOperand(4) != Load2->getOperand(4))
+ return false;
+
+ // Index should be Reg0.
+ if (Load1->getOperand(3) != Load2->getOperand(3))
+ return false;
+
+ // Determine the offsets.
+ if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
+ isa<ConstantSDNode>(Load2->getOperand(1))) {
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
+ Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+/// be scheduled togther. On some targets if two loads are loading from
+/// addresses in the same cache line, it's better if they are scheduled
+/// together. This function takes two integers that represent the load offsets
+/// from the common base address. It returns true if it decides it's desirable
+/// to schedule the two loads together. "NumLoads" is the number of loads that
+/// have already been scheduled after Load1.
+bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ assert(Offset2 > Offset1);
+
+ if ((Offset2 - Offset1) / 8 > 64)
+ return false;
+
+ if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+ return false; // FIXME: overly conservative?
+
+ // Four loads in a row should be sufficient.
+ if (NumLoads >= 3)
+ return false;
+
+ return true;
+}
+
+bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Debug info is never a scheduling boundary. It's necessary to be explicit
+ // due to the special treatment of IT instructions below, otherwise a
+ // dbg_value followed by an IT will result in the IT instruction being
+ // considered a scheduling hazard, which is wrong. It should be the actual
+ // instruction preceding the dbg_value instruction(s), just like it is
+ // when debug info is not present.
+ if (MI->isDebugValue())
+ return false;
+
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel())
+ return true;
+
+ // Treat the start of the IT block as a scheduling boundary, but schedule
+ // t2IT along with all instructions following it.
+ // FIXME: This is a big hammer. But the alternative is to add all potential
+ // true and anti dependencies to IT block instructions as implicit operands
+ // to the t2IT instruction. The added compile time and complexity does not
+ // seem worth it.
+ MachineBasicBlock::const_iterator I = MI;
+ // Make sure to skip any dbg_value instructions
+ while (++I != MBB->end() && I->isDebugValue())
+ ;
+ if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ if (MI->definesRegister(ARM::SP))
+ return true;
+
+ return false;
+}
+
+bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ float Probability,
+ float Confidence) const {
+ if (!NumCyles)
+ return false;
+
+ // Attempt to estimate the relative costs of predication versus branching.
+ float UnpredCost = Probability * NumCyles;
+ UnpredCost += 1.0; // The branch itself
+ UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
+
+ return (float)(NumCyles + ExtraPredCycles) < UnpredCost;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned TCycles, unsigned TExtra,
+ MachineBasicBlock &FMBB,
+ unsigned FCycles, unsigned FExtra,
+ float Probability, float Confidence) const {
+ if (!TCycles || !FCycles)
+ return false;
+
+ // Attempt to estimate the relative costs of predication versus branching.
+ float UnpredCost = Probability * TCycles + (1.0 - Probability) * FCycles;
+ UnpredCost += 1.0; // The branch itself
+ UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
+
+ return (float)(TCycles + FCycles + TExtra + FExtra) < UnpredCost;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes
+llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx == -1) {
+ PredReg = 0;
+ return ARMCC::AL;
+ }
+
+ PredReg = MI->getOperand(PIdx+1).getReg();
+ return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
+}
+
+
+int llvm::getMatchingCondBranchOpcode(int Opc) {
+ if (Opc == ARM::B)
+ return ARM::Bcc;
+ else if (Opc == ARM::tB)
+ return ARM::tBcc;
+ else if (Opc == ARM::t2B)
+ return ARM::t2Bcc;
+
+ llvm_unreachable("Unknown unconditional branch opcode!");
+ return 0;
+}
+
+
+void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII) {
+ bool isSub = NumBytes < 0;
+ if (isSub) NumBytes = -NumBytes;
+
+ while (NumBytes) {
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
+ unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
+ assert(ThisVal && "Didn't extract field correctly");
+
+ // We will handle these bits from offset, clear them.
+ NumBytes &= ~ThisVal;
+
+ assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
+
+ // Build the new ADD / SUB.
+ unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ BaseReg = DestReg;
+ }
+}
+
+bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) {
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ bool isSub = false;
+
+ // Memory operands in inline assembly always use AddrMode2.
+ if (Opcode == ARM::INLINEASM)
+ AddrMode = ARMII::AddrMode2;
+
+ if (Opcode == ARM::ADDri) {
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+ if (Offset == 0) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::MOVr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.RemoveOperand(FrameRegIdx+1);
+ Offset = 0;
+ return true;
+ } else if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ MI.setDesc(TII.get(ARM::SUBri));
+ }
+
+ // Common case: small offset, fits into instruction.
+ if (ARM_AM::getSOImmVal(Offset) != -1) {
+ // Replace the FrameIndex with sp / fp
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, pull as much of the immedidate into this ADDri/SUBri
+ // as possible.
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
+ unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
+
+ // We will handle these bits from offset, clear them.
+ Offset &= ~ThisImmVal;
+
+ // Get the properly encoded SOImmVal field.
+ assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
+ "Bit extraction didn't work?");
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+ } else {
+ unsigned ImmIdx = 0;
+ int InstrOffs = 0;
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ switch (AddrMode) {
+ case ARMII::AddrMode_i12: {
+ ImmIdx = FrameRegIdx + 1;
+ InstrOffs = MI.getOperand(ImmIdx).getImm();
+ NumBits = 12;
+ break;
+ }
+ case ARMII::AddrMode2: {
+ ImmIdx = FrameRegIdx+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 12;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = FrameRegIdx+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ break;
+ }
+ case ARMII::AddrMode4:
+ case ARMII::AddrMode6:
+ // Can't fold any offset even if it's zero.
+ return false;
+ case ARMII::AddrMode5: {
+ ImmIdx = FrameRegIdx+1;
+ InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ Scale = 4;
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ Offset += InstrOffs * Scale;
+ assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ }
+
+ // Attempt to fold address comp. if opcode has offset bits
+ if (NumBits > 0) {
+ // Common case: small offset, fits into instruction.
+ MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with sp
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ // FIXME: When addrmode2 goes away, this will simplify (like the
+ // T2 version), as the LDR.i12 versions don't need the encoding
+ // tricks for the offset value.
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode_i12)
+ ImmedOffset = -ImmedOffset;
+ else
+ ImmedOffset |= 1 << NumBits;
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+ ImmedOffset = ImmedOffset & Mask;
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode_i12)
+ ImmedOffset = -ImmedOffset;
+ else
+ ImmedOffset |= 1 << NumBits;
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask*Scale);
+ }
+ }
+
+ Offset = (isSub) ? -Offset : Offset;
+ return Offset == 0;
+}
+
+bool ARMBaseInstrInfo::
+AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
+ int &CmpValue) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::CMPri:
+ case ARM::t2CMPri:
+ SrcReg = MI->getOperand(0).getReg();
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(1).getImm();
+ return true;
+ case ARM::TSTri:
+ case ARM::t2TSTri:
+ SrcReg = MI->getOperand(0).getReg();
+ CmpMask = MI->getOperand(1).getImm();
+ CmpValue = 0;
+ return true;
+ }
+
+ return false;
+}
+
+/// isSuitableForMask - Identify a suitable 'and' instruction that
+/// operates on the given source register and applies the same mask
+/// as a 'tst' instruction. Provide a limited look-through for copies.
+/// When successful, MI will hold the found instruction.
+static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
+ int CmpMask, bool CommonUse) {
+ switch (MI->getOpcode()) {
+ case ARM::ANDri:
+ case ARM::t2ANDri:
+ if (CmpMask != MI->getOperand(2).getImm())
+ return false;
+ if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
+ return true;
+ break;
+ case ARM::COPY: {
+ // Walk down one instruction which is potentially an 'and'.
+ const MachineInstr &Copy = *MI;
+ MachineBasicBlock::iterator AND(
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ if (AND == MI->getParent()->end()) return false;
+ MI = AND;
+ return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
+ CmpMask, true);
+ }
+ }
+
+ return false;
+}
+
+/// OptimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register.
+bool ARMBaseInstrInfo::
+OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
+ int CmpValue, const MachineRegisterInfo *MRI) const {
+ if (CmpValue != 0)
+ return false;
+
+ MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
+ if (llvm::next(DI) != MRI->def_end())
+ // Only support one definition.
+ return false;
+
+ MachineInstr *MI = &*DI;
+
+ // Masked compares sometimes use the same register as the corresponding 'and'.
+ if (CmpMask != ~0) {
+ if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) {
+ MI = 0;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ if (UI->getParent() != CmpInstr->getParent()) continue;
+ MachineInstr *PotentialAND = &*UI;
+ if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true))
+ continue;
+ MI = PotentialAND;
+ break;
+ }
+ if (!MI) return false;
+ }
+ }
+
+ // Conservatively refuse to convert an instruction which isn't in the same BB
+ // as the comparison.
+ if (MI->getParent() != CmpInstr->getParent())
+ return false;
+
+ // Check that CPSR isn't set between the comparison instruction and the one we
+ // want to change.
+ MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
+ B = MI->getParent()->begin();
+
+ // Early exit if CmpInstr is at the beginning of the BB.
+ if (I == B) return false;
+
+ --I;
+ for (; I != E; --I) {
+ const MachineInstr &Instr = *I;
+
+ for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (!MO.isReg()) continue;
+
+ // This instruction modifies or uses CPSR after the one we want to
+ // change. We can't do this transformation.
+ if (MO.getReg() == ARM::CPSR)
+ return false;
+ }
+
+ if (I == B)
+ // The 'and' is below the comparison instruction.
+ return false;
+ }
+
+ // Set the "zero" bit in CPSR.
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::ADDri:
+ case ARM::ANDri:
+ case ARM::t2ANDri:
+ case ARM::SUBri:
+ case ARM::t2ADDri:
+ case ARM::t2SUBri:
+ // Toggle the optional operand to CPSR.
+ MI->getOperand(5).setReg(ARM::CPSR);
+ MI->getOperand(5).setIsDef(true);
+ CmpInstr->eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
+ MachineInstr *DefMI, unsigned Reg,
+ MachineRegisterInfo *MRI) const {
+ // Fold large immediates into add, sub, or, xor.
+ unsigned DefOpc = DefMI->getOpcode();
+ if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
+ return false;
+ if (!DefMI->getOperand(1).isImm())
+ // Could be t2MOVi32imm <ga:xx>
+ return false;
+
+ if (!MRI->hasOneNonDBGUse(Reg))
+ return false;
+
+ unsigned UseOpc = UseMI->getOpcode();
+ unsigned NewUseOpc = 0;
+ uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
+ uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
+ bool Commute = false;
+ switch (UseOpc) {
+ default: return false;
+ case ARM::SUBrr:
+ case ARM::ADDrr:
+ case ARM::ORRrr:
+ case ARM::EORrr:
+ case ARM::t2SUBrr:
+ case ARM::t2ADDrr:
+ case ARM::t2ORRrr:
+ case ARM::t2EORrr: {
+ Commute = UseMI->getOperand(2).getReg() != Reg;
+ switch (UseOpc) {
+ default: break;
+ case ARM::SUBrr: {
+ if (Commute)
+ return false;
+ ImmVal = -ImmVal;
+ NewUseOpc = ARM::SUBri;
+ // Fallthrough
+ }
+ case ARM::ADDrr:
+ case ARM::ORRrr:
+ case ARM::EORrr: {
+ if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
+ return false;
+ SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ switch (UseOpc) {
+ default: break;
+ case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
+ case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
+ case ARM::EORrr: NewUseOpc = ARM::EORri; break;
+ }
+ break;
+ }
+ case ARM::t2SUBrr: {
+ if (Commute)
+ return false;
+ ImmVal = -ImmVal;
+ NewUseOpc = ARM::t2SUBri;
+ // Fallthrough
+ }
+ case ARM::t2ADDrr:
+ case ARM::t2ORRrr:
+ case ARM::t2EORrr: {
+ if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
+ return false;
+ SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
+ switch (UseOpc) {
+ default: break;
+ case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
+ case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
+ case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ unsigned OpIdx = Commute ? 2 : 1;
+ unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
+ bool isKill = UseMI->getOperand(OpIdx).isKill();
+ unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
+ AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
+ *UseMI, UseMI->getDebugLoc(),
+ get(NewUseOpc), NewReg)
+ .addReg(Reg1, getKillRegState(isKill))
+ .addImm(SOImmValV1)));
+ UseMI->setDesc(get(NewUseOpc));
+ UseMI->getOperand(1).setReg(NewReg);
+ UseMI->getOperand(1).setIsKill();
+ UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
+ DefMI->eraseFromParent();
+ return true;
+}
+
+unsigned
+ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ const TargetInstrDesc &Desc = MI->getDesc();
+ unsigned Class = Desc.getSchedClass();
+ unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (UOps)
+ return UOps;
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unexpected multi-uops instruction!");
+ break;
+ case ARM::VLDMQIA:
+ case ARM::VLDMQDB:
+ case ARM::VSTMQIA:
+ case ARM::VSTMQDB:
+ return 2;
+
+ // The number of uOps for load / store multiple are determined by the number
+ // registers.
+ //
+ // On Cortex-A8, each pair of register loads / stores can be scheduled on the
+ // same cycle. The scheduling for the first load / store must be done
+ // separately by assuming the the address is not 64-bit aligned.
+ //
+ // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
+ // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
+ // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
+ case ARM::VLDMDIA:
+ case ARM::VLDMDDB:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSDB:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMDIA:
+ case ARM::VSTMDDB:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSDB:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD: {
+ unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
+ return (NumRegs / 2) + (NumRegs % 2) + 1;
+ }
+
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA:
+ case ARM::tSTMIA_UPD:
+ case ARM::tPOP_RET:
+ case ARM::tPOP:
+ case ARM::tPUSH:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
+ unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
+ if (Subtarget.isCortexA8()) {
+ if (NumRegs < 4)
+ return 2;
+ // 4 registers would be issued: 2, 2.
+ // 5 registers would be issued: 2, 2, 1.
+ UOps = (NumRegs / 2);
+ if (NumRegs % 2)
+ ++UOps;
+ return UOps;
+ } else if (Subtarget.isCortexA9()) {
+ UOps = (NumRegs / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((NumRegs % 2) ||
+ !MI->hasOneMemOperand() ||
+ (*MI->memoperands_begin())->getAlignment() < 8)
+ ++UOps;
+ return UOps;
+ } else {
+ // Assume the worst.
+ return NumRegs;
+ }
+ }
+ }
+}
+
+int
+ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const {
+ int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ // Def is the address writeback.
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+
+ int DefCycle;
+ if (Subtarget.isCortexA8()) {
+ // (regno / 2) + (regno % 2) + 1
+ DefCycle = RegNo / 2 + 1;
+ if (RegNo % 2)
+ ++DefCycle;
+ } else if (Subtarget.isCortexA9()) {
+ DefCycle = RegNo;
+ bool isSLoad = false;
+
+ switch (DefTID.getOpcode()) {
+ default: break;
+ case ARM::VLDMSIA:
+ case ARM::VLDMSDB:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ isSLoad = true;
+ break;
+ }
+
+ // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+ // then it takes an extra cycle.
+ if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
+ ++DefCycle;
+ } else {
+ // Assume the worst.
+ DefCycle = RegNo + 2;
+ }
+
+ return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const {
+ int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ // Def is the address writeback.
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+
+ int DefCycle;
+ if (Subtarget.isCortexA8()) {
+ // 4 registers would be issued: 1, 2, 1.
+ // 5 registers would be issued: 1, 2, 2.
+ DefCycle = RegNo / 2;
+ if (DefCycle < 1)
+ DefCycle = 1;
+ // Result latency is issue cycle + 2: E2.
+ DefCycle += 2;
+ } else if (Subtarget.isCortexA9()) {
+ DefCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || DefAlign < 8)
+ ++DefCycle;
+ // Result latency is AGU cycles + 2.
+ DefCycle += 2;
+ } else {
+ // Assume the worst.
+ DefCycle = RegNo + 2;
+ }
+
+ return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &UseTID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const {
+ int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ return ItinData->getOperandCycle(UseClass, UseIdx);
+
+ int UseCycle;
+ if (Subtarget.isCortexA8()) {
+ // (regno / 2) + (regno % 2) + 1
+ UseCycle = RegNo / 2 + 1;
+ if (RegNo % 2)
+ ++UseCycle;
+ } else if (Subtarget.isCortexA9()) {
+ UseCycle = RegNo;
+ bool isSStore = false;
+
+ switch (UseTID.getOpcode()) {
+ default: break;
+ case ARM::VSTMSIA:
+ case ARM::VSTMSDB:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ isSStore = true;
+ break;
+ }
+
+ // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+ // then it takes an extra cycle.
+ if ((isSStore && (RegNo % 2)) || UseAlign < 8)
+ ++UseCycle;
+ } else {
+ // Assume the worst.
+ UseCycle = RegNo + 2;
+ }
+
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &UseTID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const {
+ int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ return ItinData->getOperandCycle(UseClass, UseIdx);
+
+ int UseCycle;
+ if (Subtarget.isCortexA8()) {
+ UseCycle = RegNo / 2;
+ if (UseCycle < 2)
+ UseCycle = 2;
+ // Read in E3.
+ UseCycle += 2;
+ } else if (Subtarget.isCortexA9()) {
+ UseCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || UseAlign < 8)
+ ++UseCycle;
+ } else {
+ // Assume the worst.
+ UseCycle = 1;
+ }
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefIdx, unsigned DefAlign,
+ const TargetInstrDesc &UseTID,
+ unsigned UseIdx, unsigned UseAlign) const {
+ unsigned DefClass = DefTID.getSchedClass();
+ unsigned UseClass = UseTID.getSchedClass();
+
+ if (DefIdx < DefTID.getNumDefs() && UseIdx < UseTID.getNumOperands())
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+
+ // This may be a def / use of a variable_ops instruction, the operand
+ // latency might be determinable dynamically. Let the target try to
+ // figure it out.
+ int DefCycle = -1;
+ bool LdmBypass = false;
+ switch (DefTID.getOpcode()) {
+ default:
+ DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ break;
+
+ case ARM::VLDMDIA:
+ case ARM::VLDMDDB:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSDB:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
+ break;
+
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tPUSH:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ LdmBypass = 1;
+ DefCycle = getLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
+ break;
+ }
+
+ if (DefCycle == -1)
+ // We can't seem to determine the result latency of the def, assume it's 2.
+ DefCycle = 2;
+
+ int UseCycle = -1;
+ switch (UseTID.getOpcode()) {
+ default:
+ UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
+ break;
+
+ case ARM::VSTMDIA:
+ case ARM::VSTMDDB:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSDB:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
+ break;
+
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tSTMIA:
+ case ARM::tSTMIA_UPD:
+ case ARM::tPOP_RET:
+ case ARM::tPOP:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ UseCycle = getSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
+ break;
+ }
+
+ if (UseCycle == -1)
+ // Assume it's read in the first stage.
+ UseCycle = 1;
+
+ UseCycle = DefCycle - UseCycle + 1;
+ if (UseCycle > 0) {
+ if (LdmBypass) {
+ // It's a variable_ops instruction so we can't use DefIdx here. Just use
+ // first def operand.
+ if (ItinData->hasPipelineForwarding(DefClass, DefTID.getNumOperands()-1,
+ UseClass, UseIdx))
+ --UseCycle;
+ } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
+ UseClass, UseIdx)) {
+ --UseCycle;
+ }
+ }
+
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef())
+ return 1;
+
+ const TargetInstrDesc &DefTID = DefMI->getDesc();
+ if (!ItinData || ItinData->isEmpty())
+ return DefTID.mayLoad() ? 3 : 1;
+
+ const TargetInstrDesc &UseTID = UseMI->getDesc();
+ const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+ if (DefMO.getReg() == ARM::CPSR) {
+ if (DefMI->getOpcode() == ARM::FMSTAT) {
+ // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
+ return Subtarget.isCortexA9() ? 1 : 20;
+ }
+
+ // CPSR set and branch can be paired in the same cycle.
+ if (UseTID.isBranch())
+ return 0;
+ }
+
+ unsigned DefAlign = DefMI->hasOneMemOperand()
+ ? (*DefMI->memoperands_begin())->getAlignment() : 0;
+ unsigned UseAlign = UseMI->hasOneMemOperand()
+ ? (*UseMI->memoperands_begin())->getAlignment() : 0;
+ int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
+ UseTID, UseIdx, UseAlign);
+
+ if (Latency > 1 &&
+ (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+ // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+ // variants are one cycle cheaper.
+ switch (DefTID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal = DefMI->getOperand(3).getImm();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt = DefMI->getOperand(3).getImm();
+ if (ShAmt == 0 || ShAmt == 2)
+ --Latency;
+ break;
+ }
+ }
+ }
+
+ return Latency;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!DefNode->isMachineOpcode())
+ return 1;
+
+ const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode());
+
+ if (isZeroCost(DefTID.Opcode))
+ return 0;
+
+ if (!ItinData || ItinData->isEmpty())
+ return DefTID.mayLoad() ? 3 : 1;
+
+ if (!UseNode->isMachineOpcode()) {
+ int Latency = ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx);
+ if (Subtarget.isCortexA9())
+ return Latency <= 2 ? 1 : Latency - 1;
+ else
+ return Latency <= 3 ? 1 : Latency - 2;
+ }
+
+ const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode());
+ const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
+ unsigned DefAlign = !DefMN->memoperands_empty()
+ ? (*DefMN->memoperands_begin())->getAlignment() : 0;
+ const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
+ unsigned UseAlign = !UseMN->memoperands_empty()
+ ? (*UseMN->memoperands_begin())->getAlignment() : 0;
+ int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
+ UseTID, UseIdx, UseAlign);
+
+ if (Latency > 1 &&
+ (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+ // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+ // variants are one cycle cheaper.
+ switch (DefTID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ if (ShAmt == 0 || ShAmt == 2)
+ --Latency;
+ break;
+ }
+ }
+ }
+
+ return Latency;
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (MI->isCopyLike() || MI->isInsertSubreg() ||
+ MI->isRegSequence() || MI->isImplicitDef())
+ return 1;
+
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned Class = TID.getSchedClass();
+ unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (PredCost && TID.hasImplicitDefOfPhysReg(ARM::CPSR))
+ // When predicated, CPSR is an additional source operand for CPSR updating
+ // instructions, this apparently increases their latencies.
+ *PredCost = 1;
+ if (UOps)
+ return ItinData->getStageLatency(Class);
+ return getNumMicroOps(ItinData, MI);
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const {
+ if (!Node->isMachineOpcode())
+ return 1;
+
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Opcode = Node->getMachineOpcode();
+ switch (Opcode) {
+ default:
+ return ItinData->getStageLatency(get(Opcode).getSchedClass());
+ case ARM::VLDMQIA:
+ case ARM::VLDMQDB:
+ case ARM::VSTMQIA:
+ case ARM::VSTMQDB:
+ return 2;
+ }
+}
+
+bool ARMBaseInstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
+ if (Subtarget.isCortexA8() &&
+ (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
+ // CortexA8 VFP instructions are not pipelined.
+ return true;
+
+ // Hoist VFP / NEON instructions with 4 or higher latency.
+ int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ if (Latency <= 3)
+ return false;
+ return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
+ UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
+}
+
+bool ARMBaseInstrInfo::
+hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return false;
+
+ unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ if (DDomain == ARMII::DomainGeneral) {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ return (DefCycle != -1 && DefCycle <= 2);
+ }
+ return false;
+}
+
+bool
+ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+ unsigned &AddSubOpc,
+ bool &NegAcc, bool &HasLane) const {
+ DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
+ if (I == MLxEntryMap.end())
+ return false;
+
+ const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
+ MulOpc = Entry.MulOpc;
+ AddSubOpc = Entry.AddSubOpc;
+ NegAcc = Entry.NegAcc;
+ HasLane = Entry.HasLane;
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
new file mode 100644
index 0000000..1fb8872
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -0,0 +1,527 @@
+//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEINSTRUCTIONINFO_H
+#define ARMBASEINSTRUCTIONINFO_H
+
+#include "ARM.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseRegisterInfo;
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace ARMII {
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction Flags.
+
+ //===------------------------------------------------------------------===//
+ // This four-bit field describes the addressing mode used.
+
+ AddrModeMask = 0x1f,
+ AddrModeNone = 0,
+ AddrMode1 = 1,
+ AddrMode2 = 2,
+ AddrMode3 = 3,
+ AddrMode4 = 4,
+ AddrMode5 = 5,
+ AddrMode6 = 6,
+ AddrModeT1_1 = 7,
+ AddrModeT1_2 = 8,
+ AddrModeT1_4 = 9,
+ AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data
+ AddrModeT2_i12 = 11,
+ AddrModeT2_i8 = 12,
+ AddrModeT2_so = 13,
+ AddrModeT2_pc = 14, // +/- i12 for pc relative data
+ AddrModeT2_i8s4 = 15, // i8 * 4
+ AddrMode_i12 = 16,
+
+ // Size* - Flags to keep track of the size of an instruction.
+ SizeShift = 5,
+ SizeMask = 7 << SizeShift,
+ SizeSpecial = 1, // 0 byte pseudo or special case.
+ Size8Bytes = 2,
+ Size4Bytes = 3,
+ Size2Bytes = 4,
+
+ // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
+ // and store ops only. Generic "updating" flag is used for ld/st multiple.
+ IndexModeShift = 8,
+ IndexModeMask = 3 << IndexModeShift,
+ IndexModePre = 1,
+ IndexModePost = 2,
+ IndexModeUpd = 3,
+
+ //===------------------------------------------------------------------===//
+ // Instruction encoding formats.
+ //
+ FormShift = 10,
+ FormMask = 0x3f << FormShift,
+
+ // Pseudo instructions
+ Pseudo = 0 << FormShift,
+
+ // Multiply instructions
+ MulFrm = 1 << FormShift,
+
+ // Branch instructions
+ BrFrm = 2 << FormShift,
+ BrMiscFrm = 3 << FormShift,
+
+ // Data Processing instructions
+ DPFrm = 4 << FormShift,
+ DPSoRegFrm = 5 << FormShift,
+
+ // Load and Store
+ LdFrm = 6 << FormShift,
+ StFrm = 7 << FormShift,
+ LdMiscFrm = 8 << FormShift,
+ StMiscFrm = 9 << FormShift,
+ LdStMulFrm = 10 << FormShift,
+
+ LdStExFrm = 11 << FormShift,
+
+ // Miscellaneous arithmetic instructions
+ ArithMiscFrm = 12 << FormShift,
+ SatFrm = 13 << FormShift,
+
+ // Extend instructions
+ ExtFrm = 14 << FormShift,
+
+ // VFP formats
+ VFPUnaryFrm = 15 << FormShift,
+ VFPBinaryFrm = 16 << FormShift,
+ VFPConv1Frm = 17 << FormShift,
+ VFPConv2Frm = 18 << FormShift,
+ VFPConv3Frm = 19 << FormShift,
+ VFPConv4Frm = 20 << FormShift,
+ VFPConv5Frm = 21 << FormShift,
+ VFPLdStFrm = 22 << FormShift,
+ VFPLdStMulFrm = 23 << FormShift,
+ VFPMiscFrm = 24 << FormShift,
+
+ // Thumb format
+ ThumbFrm = 25 << FormShift,
+
+ // Miscelleaneous format
+ MiscFrm = 26 << FormShift,
+
+ // NEON formats
+ NGetLnFrm = 27 << FormShift,
+ NSetLnFrm = 28 << FormShift,
+ NDupFrm = 29 << FormShift,
+ NLdStFrm = 30 << FormShift,
+ N1RegModImmFrm= 31 << FormShift,
+ N2RegFrm = 32 << FormShift,
+ NVCVTFrm = 33 << FormShift,
+ NVDupLnFrm = 34 << FormShift,
+ N2RegVShLFrm = 35 << FormShift,
+ N2RegVShRFrm = 36 << FormShift,
+ N3RegFrm = 37 << FormShift,
+ N3RegVShFrm = 38 << FormShift,
+ NVExtFrm = 39 << FormShift,
+ NVMulSLFrm = 40 << FormShift,
+ NVTBLFrm = 41 << FormShift,
+
+ //===------------------------------------------------------------------===//
+ // Misc flags.
+
+ // UnaryDP - Indicates this is a unary data processing instruction, i.e.
+ // it doesn't have a Rn operand.
+ UnaryDP = 1 << 16,
+
+ // Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+ // a 16-bit Thumb instruction if certain conditions are met.
+ Xform16Bit = 1 << 17,
+
+ //===------------------------------------------------------------------===//
+ // Code domain.
+ DomainShift = 18,
+ DomainMask = 3 << DomainShift,
+ DomainGeneral = 0 << DomainShift,
+ DomainVFP = 1 << DomainShift,
+ DomainNEON = 2 << DomainShift,
+
+ //===------------------------------------------------------------------===//
+ // Field shifts - such shifts are used to set field while generating
+ // machine instructions.
+ //
+ // FIXME: This list will need adjusting/fixing as the MC code emitter
+ // takes shape and the ARMCodeEmitter.cpp bits go away.
+ ShiftTypeShift = 4,
+
+ M_BitShift = 5,
+ ShiftImmShift = 5,
+ ShiftShift = 7,
+ N_BitShift = 7,
+ ImmHiShift = 8,
+ SoRotImmShift = 8,
+ RegRsShift = 8,
+ ExtRotImmShift = 10,
+ RegRdLoShift = 12,
+ RegRdShift = 12,
+ RegRdHiShift = 16,
+ RegRnShift = 16,
+ S_BitShift = 20,
+ W_BitShift = 21,
+ AM3_I_BitShift = 22,
+ D_BitShift = 22,
+ U_BitShift = 23,
+ P_BitShift = 24,
+ I_BitShift = 25,
+ CondShift = 28
+ };
+}
+
+class ARMBaseInstrInfo : public TargetInstrInfoImpl {
+ const ARMSubtarget &Subtarget;
+
+protected:
+ // Can be only subclassed.
+ explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
+
+public:
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
+
+ virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
+ const ARMSubtarget &getSubtarget() const { return Subtarget; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const;
+
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ // Predication support.
+ bool isPredicated(const MachineInstr *MI) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+ }
+
+ ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm()
+ : ARMCC::AL;
+ }
+
+ virtual
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ virtual
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ virtual bool isPredicable(MachineInstr *MI) const;
+
+ /// GetInstSize - Returns the size of the specified MachineInstr.
+ ///
+ virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
+
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ virtual void reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const;
+
+ MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
+
+ virtual bool produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const;
+
+ /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+ /// determine if two loads are loading from the same base address. It should
+ /// only return true if the base pointers are the same and the only
+ /// differences between the two addresses is the offset. It also returns the
+ /// offsets by reference.
+ virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2)const;
+
+ /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+ /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+ /// be scheduled togther. On some targets if two loads are loading from
+ /// addresses in the same cache line, it's better if they are scheduled
+ /// together. This function takes two integers that represent the load offsets
+ /// from the common base address. It returns true if it decides it's desirable
+ /// to schedule the two loads together. "NumLoads" is the number of loads that
+ /// have already been scheduled after Load1.
+ virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ virtual bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles, unsigned ExtraPredCycles,
+ float Prob, float Confidence) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumT, unsigned ExtraT,
+ MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
+ float Probability, float Confidence) const;
+
+ virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ float Probability,
+ float Confidence) const {
+ return NumCyles == 1;
+ }
+
+ /// AnalyzeCompare - For a comparison instruction, return the source register
+ /// in SrcReg and the value it compares against in CmpValue. Return true if
+ /// the comparison instruction can be analyzed.
+ virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ int &CmpMask, int &CmpValue) const;
+
+ /// OptimizeCompareInstr - Convert the instruction to set the zero flag so
+ /// that we can remove a "comparison with zero".
+ virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const;
+
+ /// FoldImmediate - 'Reg' is known to be defined by a move immediate
+ /// instruction, try to fold the immediate into the use instruction.
+ virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const;
+
+ virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const;
+
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const;
+private:
+ int getVLDMDefCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const;
+ int getLDMDefCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const;
+ int getVSTMUseCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &UseTID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const;
+ int getSTMUseCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &UseTID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const;
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefIdx, unsigned DefAlign,
+ const TargetInstrDesc &UseTID,
+ unsigned UseIdx, unsigned UseAlign) const;
+
+ int getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI, unsigned *PredCost = 0) const;
+
+ int getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const;
+
+ bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+ bool hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx) const;
+
+private:
+ /// Modeling special VFP / NEON fp MLA / MLS hazards.
+
+ /// MLxEntryMap - Map fp MLA / MLS to the corresponding entry in the internal
+ /// MLx table.
+ DenseMap<unsigned, unsigned> MLxEntryMap;
+
+ /// MLxHazardOpcodes - Set of add / sub and multiply opcodes that would cause
+ /// stalls when scheduled together with fp MLA / MLS opcodes.
+ SmallSet<unsigned, 16> MLxHazardOpcodes;
+
+public:
+ /// isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS
+ /// instruction.
+ bool isFpMLxInstruction(unsigned Opcode) const {
+ return MLxEntryMap.count(Opcode);
+ }
+
+ /// isFpMLxInstruction - This version also returns the multiply opcode and the
+ /// addition / subtraction opcode to expand to. Return true for 'HasLane' for
+ /// the MLX instructions with an extra lane operand.
+ bool isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+ unsigned &AddSubOpc, bool &NegAcc,
+ bool &HasLane) const;
+
+ /// canCauseFpMLxStall - Return true if an instruction of the specified opcode
+ /// will cause stalls when scheduled after (within 4-cycle window) a fp
+ /// MLA / MLS instruction.
+ bool canCauseFpMLxStall(unsigned Opcode) const {
+ return MLxHazardOpcodes.count(Opcode);
+ }
+};
+
+static inline
+const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
+ return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
+ return MIB.addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB,
+ bool isDead = false) {
+ return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead));
+}
+
+static inline
+const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) {
+ return MIB.addReg(0);
+}
+
+static inline
+bool isUncondBranchOpcode(int Opc) {
+ return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B;
+}
+
+static inline
+bool isCondBranchOpcode(int Opc) {
+ return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;
+}
+
+static inline
+bool isJumpTableBranchOpcode(int Opc) {
+ return Opc == ARM::BR_JTr || Opc == ARM::BR_JTm || Opc == ARM::BR_JTadd ||
+ Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT;
+}
+
+static inline
+bool isIndirectBranchOpcode(int Opc) {
+ return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+int getMatchingCondBranchOpcode(int Opc);
+
+/// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of
+/// instructions to materializea destreg = basereg + immediate in ARM / Thumb2
+/// code.
+void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII);
+
+void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII);
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ DebugLoc dl);
+
+
+/// rewriteARMFrameIndex / rewriteT2FrameIndex -
+/// Rewrite MI to access 'Offset' bytes from the FP. Return false if the
+/// offset could not be handled directly in MI, and return the left-over
+/// portion by reference.
+bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII);
+
+bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII);
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
new file mode 100644
index 0000000..67a4b7d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -0,0 +1,1227 @@
+//===- ARMBaseRegisterInfo.cpp - ARM Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false),
+ cl::desc("Force use of virtual base registers for stack load/store"));
+static cl::opt<bool>
+EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
+ cl::desc("Enable pre-regalloc stack frame index allocation"));
+static cl::opt<bool>
+EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
+ cl::desc("Enable use of a base pointer for complex stack frames"));
+
+ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
+ TII(tii), STI(sti),
+ FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
+ BasePtr(ARM::R6) {
+}
+
+const unsigned*
+ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const unsigned CalleeSavedRegs[] = {
+ ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+ ARM::R7, ARM::R6, ARM::R5, ARM::R4,
+
+ ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+ ARM::D11, ARM::D10, ARM::D9, ARM::D8,
+ 0
+ };
+
+ static const unsigned DarwinCalleeSavedRegs[] = {
+ // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved
+ // register.
+ ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4,
+ ARM::R11, ARM::R10, ARM::R8,
+
+ ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+ ARM::D11, ARM::D10, ARM::D9, ARM::D8,
+ 0
+ };
+ return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
+}
+
+BitVector ARMBaseRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // FIXME: avoid re-calculating this everytime.
+ BitVector Reserved(getNumRegs());
+ Reserved.set(ARM::SP);
+ Reserved.set(ARM::PC);
+ Reserved.set(ARM::FPSCR);
+ if (TFI->hasFP(MF))
+ Reserved.set(FramePtr);
+ if (hasBasePointer(MF))
+ Reserved.set(BasePtr);
+ // Some targets reserve R9.
+ if (STI.isR9Reserved())
+ Reserved.set(ARM::R9);
+ return Reserved;
+}
+
+bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
+ unsigned Reg) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ switch (Reg) {
+ default: break;
+ case ARM::SP:
+ case ARM::PC:
+ return true;
+ case ARM::R6:
+ if (hasBasePointer(MF))
+ return true;
+ break;
+ case ARM::R7:
+ case ARM::R11:
+ if (FramePtr == Reg && TFI->hasFP(MF))
+ return true;
+ break;
+ case ARM::R9:
+ return STI.isR9Reserved();
+ }
+
+ return false;
+}
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B,
+ unsigned SubIdx) const {
+ switch (SubIdx) {
+ default: return 0;
+ case ARM::ssub_0:
+ case ARM::ssub_1:
+ case ARM::ssub_2:
+ case ARM::ssub_3: {
+ // S sub-registers.
+ if (A->getSize() == 8) {
+ if (B == &ARM::SPR_8RegClass)
+ return &ARM::DPR_8RegClass;
+ assert(B == &ARM::SPRRegClass && "Expecting SPR register class!");
+ if (A == &ARM::DPR_8RegClass)
+ return A;
+ return &ARM::DPR_VFP2RegClass;
+ }
+
+ if (A->getSize() == 16) {
+ if (B == &ARM::SPR_8RegClass)
+ return &ARM::QPR_8RegClass;
+ return &ARM::QPR_VFP2RegClass;
+ }
+
+ if (A->getSize() == 32) {
+ if (B == &ARM::SPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return &ARM::QQPR_VFP2RegClass;
+ }
+
+ assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+ return 0; // Do not allow coalescing!
+ }
+ case ARM::dsub_0:
+ case ARM::dsub_1:
+ case ARM::dsub_2:
+ case ARM::dsub_3: {
+ // D sub-registers.
+ if (A->getSize() == 16) {
+ if (B == &ARM::DPR_VFP2RegClass)
+ return &ARM::QPR_VFP2RegClass;
+ if (B == &ARM::DPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+
+ if (A->getSize() == 32) {
+ if (B == &ARM::DPR_VFP2RegClass)
+ return &ARM::QQPR_VFP2RegClass;
+ if (B == &ARM::DPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+
+ assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+ if (B != &ARM::DPRRegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+ case ARM::dsub_4:
+ case ARM::dsub_5:
+ case ARM::dsub_6:
+ case ARM::dsub_7: {
+ // D sub-registers of QQQQ registers.
+ if (A->getSize() == 64 && B == &ARM::DPRRegClass)
+ return A;
+ return 0; // Do not allow coalescing!
+ }
+
+ case ARM::qsub_0:
+ case ARM::qsub_1: {
+ // Q sub-registers.
+ if (A->getSize() == 32) {
+ if (B == &ARM::QPR_VFP2RegClass)
+ return &ARM::QQPR_VFP2RegClass;
+ if (B == &ARM::QPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+
+ assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+ if (B == &ARM::QPRRegClass)
+ return A;
+ return 0; // Do not allow coalescing!
+ }
+ case ARM::qsub_2:
+ case ARM::qsub_3: {
+ // Q sub-registers of QQQQ registers.
+ if (A->getSize() == 64 && B == &ARM::QPRRegClass)
+ return A;
+ return 0; // Do not allow coalescing!
+ }
+ }
+ return 0;
+}
+
+bool
+ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const {
+
+ unsigned Size = RC->getSize() * 8;
+ if (Size < 6)
+ return 0;
+
+ NewSubIdx = 0; // Whole register.
+ unsigned NumRegs = SubIndices.size();
+ if (NumRegs == 8) {
+ // 8 D registers -> 1 QQQQ register.
+ return (Size == 512 &&
+ SubIndices[0] == ARM::dsub_0 &&
+ SubIndices[1] == ARM::dsub_1 &&
+ SubIndices[2] == ARM::dsub_2 &&
+ SubIndices[3] == ARM::dsub_3 &&
+ SubIndices[4] == ARM::dsub_4 &&
+ SubIndices[5] == ARM::dsub_5 &&
+ SubIndices[6] == ARM::dsub_6 &&
+ SubIndices[7] == ARM::dsub_7);
+ } else if (NumRegs == 4) {
+ if (SubIndices[0] == ARM::qsub_0) {
+ // 4 Q registers -> 1 QQQQ register.
+ return (Size == 512 &&
+ SubIndices[1] == ARM::qsub_1 &&
+ SubIndices[2] == ARM::qsub_2 &&
+ SubIndices[3] == ARM::qsub_3);
+ } else if (SubIndices[0] == ARM::dsub_0) {
+ // 4 D registers -> 1 QQ register.
+ if (Size >= 256 &&
+ SubIndices[1] == ARM::dsub_1 &&
+ SubIndices[2] == ARM::dsub_2 &&
+ SubIndices[3] == ARM::dsub_3) {
+ if (Size == 512)
+ NewSubIdx = ARM::qqsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_4) {
+ // 4 D registers -> 1 QQ register (2nd).
+ if (Size == 512 &&
+ SubIndices[1] == ARM::dsub_5 &&
+ SubIndices[2] == ARM::dsub_6 &&
+ SubIndices[3] == ARM::dsub_7) {
+ NewSubIdx = ARM::qqsub_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::ssub_0) {
+ // 4 S registers -> 1 Q register.
+ if (Size >= 128 &&
+ SubIndices[1] == ARM::ssub_1 &&
+ SubIndices[2] == ARM::ssub_2 &&
+ SubIndices[3] == ARM::ssub_3) {
+ if (Size >= 256)
+ NewSubIdx = ARM::qsub_0;
+ return true;
+ }
+ }
+ } else if (NumRegs == 2) {
+ if (SubIndices[0] == ARM::qsub_0) {
+ // 2 Q registers -> 1 QQ register.
+ if (Size >= 256 && SubIndices[1] == ARM::qsub_1) {
+ if (Size == 512)
+ NewSubIdx = ARM::qqsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::qsub_2) {
+ // 2 Q registers -> 1 QQ register (2nd).
+ if (Size == 512 && SubIndices[1] == ARM::qsub_3) {
+ NewSubIdx = ARM::qqsub_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_0) {
+ // 2 D registers -> 1 Q register.
+ if (Size >= 128 && SubIndices[1] == ARM::dsub_1) {
+ if (Size >= 256)
+ NewSubIdx = ARM::qsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_2) {
+ // 2 D registers -> 1 Q register (2nd).
+ if (Size >= 256 && SubIndices[1] == ARM::dsub_3) {
+ NewSubIdx = ARM::qsub_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_4) {
+ // 2 D registers -> 1 Q register (3rd).
+ if (Size == 512 && SubIndices[1] == ARM::dsub_5) {
+ NewSubIdx = ARM::qsub_2;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_6) {
+ // 2 D registers -> 1 Q register (3rd).
+ if (Size == 512 && SubIndices[1] == ARM::dsub_7) {
+ NewSubIdx = ARM::qsub_3;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::ssub_0) {
+ // 2 S registers -> 1 D register.
+ if (SubIndices[1] == ARM::ssub_1) {
+ if (Size >= 128)
+ NewSubIdx = ARM::dsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::ssub_2) {
+ // 2 S registers -> 1 D register (2nd).
+ if (Size >= 128 && SubIndices[1] == ARM::ssub_3) {
+ NewSubIdx = ARM::dsub_1;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
+ return ARM::GPRRegisterClass;
+}
+
+/// getAllocationOrder - Returns the register allocation order for a specified
+/// register class in the form of a pair of TargetRegisterClass iterators.
+std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
+ unsigned HintType, unsigned HintReg,
+ const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ // Alternative register allocation orders when favoring even / odd registers
+ // of register pairs.
+
+ // No FP, R9 is available.
+ static const unsigned GPREven1[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+ ARM::R9, ARM::R11
+ };
+ static const unsigned GPROdd1[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+ ARM::R8, ARM::R10
+ };
+
+ // FP is R7, R9 is available.
+ static const unsigned GPREven2[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
+ ARM::R9, ARM::R11
+ };
+ static const unsigned GPROdd2[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+ ARM::R8, ARM::R10
+ };
+
+ // FP is R11, R9 is available.
+ static const unsigned GPREven3[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
+ ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+ ARM::R9
+ };
+ static const unsigned GPROdd3[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
+ ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
+ ARM::R8
+ };
+
+ // No FP, R9 is not available.
+ static const unsigned GPREven4[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
+ ARM::R11
+ };
+ static const unsigned GPROdd4[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+ ARM::R10
+ };
+
+ // FP is R7, R9 is not available.
+ static const unsigned GPREven5[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
+ ARM::R11
+ };
+ static const unsigned GPROdd5[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+ ARM::R10
+ };
+
+ // FP is R11, R9 is not available.
+ static const unsigned GPREven6[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6,
+ ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
+ };
+ static const unsigned GPROdd6[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R7,
+ ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
+ };
+
+
+ if (HintType == ARMRI::RegPairEven) {
+ if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
+ // It's no longer possible to fulfill this hint. Return the default
+ // allocation order.
+ return std::make_pair(RC->allocation_order_begin(MF),
+ RC->allocation_order_end(MF));
+
+ if (!TFI->hasFP(MF)) {
+ if (!STI.isR9Reserved())
+ return std::make_pair(GPREven1,
+ GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
+ else
+ return std::make_pair(GPREven4,
+ GPREven4 + (sizeof(GPREven4)/sizeof(unsigned)));
+ } else if (FramePtr == ARM::R7) {
+ if (!STI.isR9Reserved())
+ return std::make_pair(GPREven2,
+ GPREven2 + (sizeof(GPREven2)/sizeof(unsigned)));
+ else
+ return std::make_pair(GPREven5,
+ GPREven5 + (sizeof(GPREven5)/sizeof(unsigned)));
+ } else { // FramePtr == ARM::R11
+ if (!STI.isR9Reserved())
+ return std::make_pair(GPREven3,
+ GPREven3 + (sizeof(GPREven3)/sizeof(unsigned)));
+ else
+ return std::make_pair(GPREven6,
+ GPREven6 + (sizeof(GPREven6)/sizeof(unsigned)));
+ }
+ } else if (HintType == ARMRI::RegPairOdd) {
+ if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
+ // It's no longer possible to fulfill this hint. Return the default
+ // allocation order.
+ return std::make_pair(RC->allocation_order_begin(MF),
+ RC->allocation_order_end(MF));
+
+ if (!TFI->hasFP(MF)) {
+ if (!STI.isR9Reserved())
+ return std::make_pair(GPROdd1,
+ GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
+ else
+ return std::make_pair(GPROdd4,
+ GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned)));
+ } else if (FramePtr == ARM::R7) {
+ if (!STI.isR9Reserved())
+ return std::make_pair(GPROdd2,
+ GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned)));
+ else
+ return std::make_pair(GPROdd5,
+ GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned)));
+ } else { // FramePtr == ARM::R11
+ if (!STI.isR9Reserved())
+ return std::make_pair(GPROdd3,
+ GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned)));
+ else
+ return std::make_pair(GPROdd6,
+ GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned)));
+ }
+ }
+ return std::make_pair(RC->allocation_order_begin(MF),
+ RC->allocation_order_end(MF));
+}
+
+/// ResolveRegAllocHint - Resolves the specified register allocation hint
+/// to a physical register. Returns the physical register if it is successful.
+unsigned
+ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
+ const MachineFunction &MF) const {
+ if (Reg == 0 || !isPhysicalRegister(Reg))
+ return 0;
+ if (Type == 0)
+ return Reg;
+ else if (Type == (unsigned)ARMRI::RegPairOdd)
+ // Odd register.
+ return getRegisterPairOdd(Reg, MF);
+ else if (Type == (unsigned)ARMRI::RegPairEven)
+ // Even register.
+ return getRegisterPairEven(Reg, MF);
+ return 0;
+}
+
+void
+ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+ MachineFunction &MF) const {
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+ if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
+ Hint.first == (unsigned)ARMRI::RegPairEven) &&
+ TargetRegisterInfo::isVirtualRegister(Hint.second)) {
+ // If 'Reg' is one of the even / odd register pair and it's now changed
+ // (e.g. coalesced) into a different register. The other register of the
+ // pair allocation hint must be updated to reflect the relationship
+ // change.
+ unsigned OtherReg = Hint.second;
+ Hint = MRI->getRegAllocationHint(OtherReg);
+ if (Hint.second == Reg)
+ // Make sure the pair has not already divorced.
+ MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
+ }
+}
+
+bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ if (!EnableBasePointer)
+ return false;
+
+ if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+ return true;
+
+ // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
+ // negative range for ldr/str (255), and thumb1 is positive offsets only.
+ // It's going to be better to use the SP or Base Pointer instead. When there
+ // are variable sized objects, we can't reference off of the SP, so we
+ // reserve a Base Pointer.
+ if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) {
+ // Conservatively estimate whether the negative offset from the frame
+ // pointer will be sufficient to reach. If a function has a smallish
+ // frame, it's less likely to have lots of spills and callee saved
+ // space, so it's all more likely to be within range of the frame pointer.
+ // If it's wrong, the scavenger will still enable access to work, it just
+ // won't be optimal.
+ if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128)
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ // We can't realign the stack if:
+ // 1. Dynamic stack realignment is explicitly disabled,
+ // 2. This is a Thumb1 function (it's not useful, so we don't bother), or
+ // 3. There are VLAs in the function and the base pointer is disabled.
+ return (RealignStack && !AFI->isThumb1OnlyFunction() &&
+ (!MFI->hasVarSizedObjects() || EnableBasePointer));
+}
+
+bool ARMBaseRegisterInfo::
+needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) ||
+ F->hasFnAttr(Attribute::StackAlignment));
+
+ return requiresRealignment && canRealignStack(MF);
+}
+
+bool ARMBaseRegisterInfo::
+cannotEliminateFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (DisableFramePointerElim(MF) && MFI->adjustsStack())
+ return true;
+ return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
+ || needsStackRealignment(MF);
+}
+
+unsigned ARMBaseRegisterInfo::getRARegister() const {
+ return ARM::LR;
+}
+
+unsigned
+ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (TFI->hasFP(MF))
+ return FramePtr;
+ return ARM::SP;
+}
+
+unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
+
+int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
+ const MachineFunction &MF) const {
+ switch (Reg) {
+ default: break;
+ // Return 0 if either register of the pair is a special register.
+ // So no R12, etc.
+ case ARM::R1:
+ return ARM::R0;
+ case ARM::R3:
+ return ARM::R2;
+ case ARM::R5:
+ return ARM::R4;
+ case ARM::R7:
+ return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ ? 0 : ARM::R6;
+ case ARM::R9:
+ return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
+ case ARM::R11:
+ return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
+
+ case ARM::S1:
+ return ARM::S0;
+ case ARM::S3:
+ return ARM::S2;
+ case ARM::S5:
+ return ARM::S4;
+ case ARM::S7:
+ return ARM::S6;
+ case ARM::S9:
+ return ARM::S8;
+ case ARM::S11:
+ return ARM::S10;
+ case ARM::S13:
+ return ARM::S12;
+ case ARM::S15:
+ return ARM::S14;
+ case ARM::S17:
+ return ARM::S16;
+ case ARM::S19:
+ return ARM::S18;
+ case ARM::S21:
+ return ARM::S20;
+ case ARM::S23:
+ return ARM::S22;
+ case ARM::S25:
+ return ARM::S24;
+ case ARM::S27:
+ return ARM::S26;
+ case ARM::S29:
+ return ARM::S28;
+ case ARM::S31:
+ return ARM::S30;
+
+ case ARM::D1:
+ return ARM::D0;
+ case ARM::D3:
+ return ARM::D2;
+ case ARM::D5:
+ return ARM::D4;
+ case ARM::D7:
+ return ARM::D6;
+ case ARM::D9:
+ return ARM::D8;
+ case ARM::D11:
+ return ARM::D10;
+ case ARM::D13:
+ return ARM::D12;
+ case ARM::D15:
+ return ARM::D14;
+ case ARM::D17:
+ return ARM::D16;
+ case ARM::D19:
+ return ARM::D18;
+ case ARM::D21:
+ return ARM::D20;
+ case ARM::D23:
+ return ARM::D22;
+ case ARM::D25:
+ return ARM::D24;
+ case ARM::D27:
+ return ARM::D26;
+ case ARM::D29:
+ return ARM::D28;
+ case ARM::D31:
+ return ARM::D30;
+ }
+
+ return 0;
+}
+
+unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
+ const MachineFunction &MF) const {
+ switch (Reg) {
+ default: break;
+ // Return 0 if either register of the pair is a special register.
+ // So no R12, etc.
+ case ARM::R0:
+ return ARM::R1;
+ case ARM::R2:
+ return ARM::R3;
+ case ARM::R4:
+ return ARM::R5;
+ case ARM::R6:
+ return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ ? 0 : ARM::R7;
+ case ARM::R8:
+ return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
+ case ARM::R10:
+ return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
+
+ case ARM::S0:
+ return ARM::S1;
+ case ARM::S2:
+ return ARM::S3;
+ case ARM::S4:
+ return ARM::S5;
+ case ARM::S6:
+ return ARM::S7;
+ case ARM::S8:
+ return ARM::S9;
+ case ARM::S10:
+ return ARM::S11;
+ case ARM::S12:
+ return ARM::S13;
+ case ARM::S14:
+ return ARM::S15;
+ case ARM::S16:
+ return ARM::S17;
+ case ARM::S18:
+ return ARM::S19;
+ case ARM::S20:
+ return ARM::S21;
+ case ARM::S22:
+ return ARM::S23;
+ case ARM::S24:
+ return ARM::S25;
+ case ARM::S26:
+ return ARM::S27;
+ case ARM::S28:
+ return ARM::S29;
+ case ARM::S30:
+ return ARM::S31;
+
+ case ARM::D0:
+ return ARM::D1;
+ case ARM::D2:
+ return ARM::D3;
+ case ARM::D4:
+ return ARM::D5;
+ case ARM::D6:
+ return ARM::D7;
+ case ARM::D8:
+ return ARM::D9;
+ case ARM::D10:
+ return ARM::D11;
+ case ARM::D12:
+ return ARM::D13;
+ case ARM::D14:
+ return ARM::D15;
+ case ARM::D16:
+ return ARM::D17;
+ case ARM::D18:
+ return ARM::D19;
+ case ARM::D20:
+ return ARM::D21;
+ case ARM::D22:
+ return ARM::D23;
+ case ARM::D24:
+ return ARM::D25;
+ case ARM::D26:
+ return ARM::D27;
+ case ARM::D28:
+ return ARM::D29;
+ case ARM::D30:
+ return ARM::D31;
+ }
+
+ return 0;
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ARMBaseRegisterInfo::
+emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred,
+ unsigned PredReg) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C =
+ ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx)
+ .addImm(0).addImm(Pred).addReg(PredReg);
+}
+
+bool ARMBaseRegisterInfo::
+requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
+bool ARMBaseRegisterInfo::
+requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
+bool ARMBaseRegisterInfo::
+requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+ return EnableLocalStackAlloc;
+}
+
+static void
+emitSPUpdate(bool isARM,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, const ARMBaseInstrInfo &TII,
+ int NumBytes,
+ ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ Pred, PredReg, TII);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ Pred, PredReg, TII);
+}
+
+
+void ARMBaseRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ if (!TFI->hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = TFI->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This eliminateCallFramePseudoInstr does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ int PIdx = Old->findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
+ unsigned PredReg = Old->getOperand(2).getReg();
+ emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, Pred, PredReg);
+ } else {
+ // Note: PredReg is operand 3 for ADJCALLSTACKUP.
+ unsigned PredReg = Old->getOperand(3).getReg();
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(isARM, MBB, I, dl, TII, Amount, Pred, PredReg);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
+int64_t ARMBaseRegisterInfo::
+getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
+ const TargetInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ int64_t InstrOffs = 0;;
+ int Scale = 1;
+ unsigned ImmIdx = 0;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ case ARMII::AddrMode_i12:
+ InstrOffs = MI->getOperand(Idx+1).getImm();
+ Scale = 1;
+ break;
+ case ARMII::AddrMode5: {
+ // VFP address mode.
+ const MachineOperand &OffOp = MI->getOperand(Idx+1);
+ InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+ if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ Scale = 4;
+ break;
+ }
+ case ARMII::AddrMode2: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrModeT1_s: {
+ ImmIdx = Idx+1;
+ InstrOffs = MI->getOperand(ImmIdx).getImm();
+ Scale = 4;
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ return InstrOffs * Scale;
+}
+
+/// needsFrameBaseReg - Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool ARMBaseRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+ for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) {
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // It's the load/store FI references that cause issues, as it can be difficult
+ // to materialize the offset if it won't fit in the literal field. Estimate
+ // based on the size of the local frame and some conservative assumptions
+ // about the rest of the stack frame (note, this is pre-regalloc, so
+ // we don't know everything for certain yet) whether this offset is likely
+ // to be out of range of the immediate. Return true if so.
+
+ // We only generate virtual base registers for loads and stores, so
+ // return false for everything else.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::LDRi12: case ARM::LDRH: case ARM::LDRBi12:
+ case ARM::STRi12: case ARM::STRH: case ARM::STRBi12:
+ case ARM::t2LDRi12: case ARM::t2LDRi8:
+ case ARM::t2STRi12: case ARM::t2STRi8:
+ case ARM::VLDRS: case ARM::VLDRD:
+ case ARM::VSTRS: case ARM::VSTRD:
+ case ARM::tSTRspi: case ARM::tLDRspi:
+ if (ForceAllBaseRegAlloc)
+ return true;
+ break;
+ default:
+ return false;
+ }
+
+ // Without a virtual base register, if the function has variable sized
+ // objects, all fixed-size local references will be via the frame pointer,
+ // Approximate the offset and see if it's legal for the instruction.
+ // Note that the incoming offset is based on the SP value at function entry,
+ // so it'll be negative.
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Estimate an offset from the frame pointer.
+ // Conservatively assume all callee-saved registers get pushed. R4-R6
+ // will be earlier than the FP, so we ignore those.
+ // R7, LR
+ int64_t FPOffset = Offset - 8;
+ // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15
+ if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction())
+ FPOffset -= 80;
+ // Estimate an offset from the stack pointer.
+ // The incoming offset is relating to the SP at the start of the function,
+ // but when we access the local it'll be relative to the SP after local
+ // allocation, so adjust our SP-relative offset by that allocation size.
+ Offset = -Offset;
+ Offset += MFI->getLocalFrameSize();
+ // Assume that we'll have at least some spill slots allocated.
+ // FIXME: This is a total SWAG number. We should run some statistics
+ // and pick a real one.
+ Offset += 128; // 128 bytes of spill slots
+
+ // If there is a frame pointer, try using it.
+ // The FP is only available if there is no dynamic realignment. We
+ // don't know for sure yet whether we'll need that, so we guess based
+ // on whether there are any local variables that would trigger it.
+ unsigned StackAlign = TFI->getStackAlignment();
+ if (TFI->hasFP(MF) &&
+ !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
+ if (isFrameOffsetLegal(MI, FPOffset))
+ return false;
+ }
+ // If we can reference via the stack pointer, try that.
+ // FIXME: This (and the code that resolves the references) can be improved
+ // to only disallow SP relative references in the live range of
+ // the VLA(s). In practice, it's unclear how much difference that
+ // would make, but it may be worth doing.
+ if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset))
+ return false;
+
+ // The offset likely isn't legal, we want to allocate a virtual base register.
+ return true;
+}
+
+/// materializeFrameBaseRegister - Insert defining instruction(s) for BaseReg to
+/// be a pointer to FrameIdx at the beginning of the basic block.
+void ARMBaseRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const {
+ ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+ unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
+ (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
+
+ MachineBasicBlock::iterator Ins = MBB->begin();
+ DebugLoc DL; // Defaults to "unknown"
+ if (Ins != MBB->end())
+ DL = Ins->getDebugLoc();
+
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, Ins, DL, TII.get(ADDriOpc), BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset);
+
+ if (!AFI->isThumb1OnlyFunction())
+ AddDefaultCC(AddDefaultPred(MIB));
+}
+
+void
+ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This resolveFrameIndex does not support Thumb1!");
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = false;
+ if (!AFI->isThumbFunction())
+ Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
+ }
+ assert (Done && "Unable to resolve frame index!");
+}
+
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const {
+ const TargetInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ unsigned i = 0;
+
+ while (!MI->getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // AddrMode4 and AddrMode6 cannot handle any offset.
+ if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+ return Offset == 0;
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ bool isSigned = true;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign, consider the appropriate instruction
+ Scale = 1;
+ if (Offset < 0) {
+ NumBits = 8;
+ Offset = -Offset;
+ } else {
+ NumBits = 12;
+ }
+ break;
+ case ARMII::AddrMode5:
+ // VFP address mode.
+ NumBits = 8;
+ Scale = 4;
+ break;
+ case ARMII::AddrMode_i12:
+ case ARMII::AddrMode2:
+ NumBits = 12;
+ break;
+ case ARMII::AddrMode3:
+ NumBits = 8;
+ break;
+ case ARMII::AddrModeT1_s:
+ NumBits = 5;
+ Scale = 4;
+ isSigned = false;
+ break;
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ Offset += getFrameIndexInstrOffset(MI, i);
+ // Make sure the offset is encodable for instructions that scale the
+ // immediate.
+ if ((Offset & (Scale-1)) != 0)
+ return false;
+
+ if (isSigned && Offset < 0)
+ Offset = -Offset;
+
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale)
+ return true;
+
+ return false;
+}
+
+void
+ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const ARMFrameLowering *TFI =
+ static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering());
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This eliminateFrameIndex does not support Thumb1!");
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+ unsigned FrameReg;
+
+ int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
+
+ // Special handling of dbg_value instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // Modify MI as necessary to handle as much of 'Offset' as possible
+ bool Done = false;
+ if (!AFI->isThumbFunction())
+ Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII);
+ }
+ if (Done)
+ return;
+
+ // If we get here, the immediate doesn't fit into the instruction. We folded
+ // as much as possible above, handle the rest, providing a register that is
+ // SP+LargeImm.
+ assert((Offset ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) &&
+ "This code isn't needed if offset already handled!");
+
+ unsigned ScratchReg = 0;
+ int PIdx = MI.findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
+ unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+ if (Offset == 0)
+ // Must be addrmode4/6.
+ MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
+ else {
+ ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
+ if (!AFI->isThumbFunction())
+ emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+ Offset, Pred, PredReg, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ emitT2RegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+ Offset, Pred, PredReg, TII);
+ }
+ // Update the original instruction to use the scratch register.
+ MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
+ if (MI.getOpcode() == ARM::t2ADDrSPi)
+ MI.setDesc(TII.get(ARM::t2ADDri));
+ else if (MI.getOpcode() == ARM::t2SUBrSPi)
+ MI.setDesc(TII.get(ARM::t2SUBri));
+ }
+}
+
+#include "ARMGenRegisterInfo.inc"
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
new file mode 100644
index 0000000..ba6bd2b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -0,0 +1,205 @@
+//===- ARMBaseRegisterInfo.h - ARM Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEREGISTERINFO_H
+#define ARMBASEREGISTERINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "ARMGenRegisterInfo.h.inc"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+ class Type;
+
+/// Register allocation hints.
+namespace ARMRI {
+ enum {
+ RegPairOdd = 1,
+ RegPairEven = 2
+ };
+}
+
+/// isARMLowRegister - Returns true if the register is low register r0-r7.
+///
+static inline bool isARMLowRegister(unsigned Reg) {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
+/// or a stack/pc register that we should push/pop.
+static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ case LR: case SP: case PC:
+ return true;
+ case R8: case R9: case R10: case R11:
+ // For darwin we want r7 and lr to be next to each other.
+ return !isDarwin;
+ default:
+ return false;
+ }
+}
+
+static inline bool isARMArea2Register(unsigned Reg, bool isDarwin) {
+ using namespace ARM;
+ switch (Reg) {
+ case R8: case R9: case R10: case R11:
+ // Darwin has this second area.
+ return isDarwin;
+ default:
+ return false;
+ }
+}
+
+static inline bool isARMArea3Register(unsigned Reg, bool isDarwin) {
+ using namespace ARM;
+ switch (Reg) {
+ case D15: case D14: case D13: case D12:
+ case D11: case D10: case D9: case D8:
+ return true;
+ default:
+ return false;
+ }
+}
+
+class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
+protected:
+ const ARMBaseInstrInfo &TII;
+ const ARMSubtarget &STI;
+
+ /// FramePtr - ARM physical register used as frame ptr.
+ unsigned FramePtr;
+
+ /// BasePtr - ARM physical register used as a base ptr in complex stack
+ /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
+ /// variable size stack objects.
+ unsigned BasePtr;
+
+ // Can be only subclassed.
+ explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &STI);
+
+ // Return the opcode that implements 'Op', or 0 if no opcode
+ unsigned getOpcode(int Op) const;
+
+public:
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// getMatchingSuperRegClass - Return a subclass of the specified register
+ /// class A so that each register in it has a sub-register of the
+ /// specified sub-register index which is in the specified register class B.
+ virtual const TargetRegisterClass *
+ getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B, unsigned Idx) const;
+
+ /// canCombineSubRegIndices - Given a register class and a list of
+ /// subregister indices, return true if it's possible to combine the
+ /// subregister indices into one that corresponds to a larger
+ /// subregister. Return the new subregister index by reference. Note the
+ /// new index may be zero if the given subregisters can be combined to
+ /// form the whole register.
+ virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const;
+
+ const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+
+ std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+ getAllocationOrder(const TargetRegisterClass *RC,
+ unsigned HintType, unsigned HintReg,
+ const MachineFunction &MF) const;
+
+ unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
+ const MachineFunction &MF) const;
+
+ void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+ MachineFunction &MF) const;
+
+ bool hasBasePointer(const MachineFunction &MF) const;
+
+ bool canRealignStack(const MachineFunction &MF) const;
+ bool needsStackRealignment(const MachineFunction &MF) const;
+ int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const;
+ bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+ void materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
+ bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
+
+ bool cannotEliminateFrame(const MachineFunction &MF) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getBaseRegister() const { return BasePtr; }
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+ bool isLowRegister(unsigned Reg) const;
+
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ virtual void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx,
+ int Val,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0) const;
+
+ /// Code Generation virtual methods...
+ virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+
+ virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
+
+ virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
+
+ virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+private:
+ unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
+
+ unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
new file mode 100644
index 0000000..69eddf0
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
@@ -0,0 +1,131 @@
+//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains enumerations and support routines for ARM build attributes
+// as defined in ARM ABI addenda document (ABI release 2.08).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __TARGET_ARMBUILDATTRS_H__
+#define __TARGET_ARMBUILDATTRS_H__
+
+namespace ARMBuildAttrs {
+ enum SpecialAttr {
+ // This is for the .cpu asm attr. It translates into one or more
+ // AttrType (below) entries in the .ARM.attributes section in the ELF.
+ SEL_CPU
+ };
+
+ enum AttrType {
+ // Rest correspond to ELF/.ARM.attributes
+ File = 1,
+ Section = 2,
+ Symbol = 3,
+ CPU_raw_name = 4,
+ CPU_name = 5,
+ CPU_arch = 6,
+ CPU_arch_profile = 7,
+ ARM_ISA_use = 8,
+ THUMB_ISA_use = 9,
+ VFP_arch = 10,
+ WMMX_arch = 11,
+ Advanced_SIMD_arch = 12,
+ PCS_config = 13,
+ ABI_PCS_R9_use = 14,
+ ABI_PCS_RW_data = 15,
+ ABI_PCS_RO_data = 16,
+ ABI_PCS_GOT_use = 17,
+ ABI_PCS_wchar_t = 18,
+ ABI_FP_rounding = 19,
+ ABI_FP_denormal = 20,
+ ABI_FP_exceptions = 21,
+ ABI_FP_user_exceptions = 22,
+ ABI_FP_number_model = 23,
+ ABI_align8_needed = 24,
+ ABI_align8_preserved = 25,
+ ABI_enum_size = 26,
+ ABI_HardFP_use = 27,
+ ABI_VFP_args = 28,
+ ABI_WMMX_args = 29,
+ ABI_optimization_goals = 30,
+ ABI_FP_optimization_goals = 31,
+ compatibility = 32,
+ CPU_unaligned_access = 34,
+ VFP_HP_extension = 36,
+ ABI_FP_16bit_format = 38,
+ MPextension_use = 42, // was 70, 2.08 ABI
+ DIV_use = 44,
+ nodefaults = 64,
+ also_compatible_with = 65,
+ T2EE_use = 66,
+ conformance = 67,
+ Virtualization_use = 68,
+ MPextension_use_old = 70
+ };
+
+ // Magic numbers for .ARM.attributes
+ enum AttrMagic {
+ Format_Version = 0x41
+ };
+
+ // Legal Values for CPU_arch, (=6), uleb128
+ enum CPUArch {
+ Pre_v4 = 0,
+ v4 = 1, // e.g. SA110
+ v4T = 2, // e.g. ARM7TDMI
+ v5T = 3, // e.g. ARM9TDMI
+ v5TE = 4, // e.g. ARM946E_S
+ v5TEJ = 5, // e.g. ARM926EJ_S
+ v6 = 6, // e.g. ARM1136J_S
+ v6KZ = 7, // e.g. ARM1176JZ_S
+ v6T2 = 8, // e.g. ARM1156T2F_S
+ v6K = 9, // e.g. ARM1136J_S
+ v7 = 10, // e.g. Cortex A8, Cortex M3
+ v6_M = 11, // e.g. Cortex M1
+ v6S_M = 12, // v6_M with the System extensions
+ v7E_M = 13 // v7_M with DSP extensions
+ };
+
+ enum CPUArchProfile { // (=7), uleb128
+ Not_Applicable = 0, // pre v7, or cross-profile code
+ ApplicationProfile = (0x41), // 'A' (e.g. for Cortex A8)
+ RealTimeProfile = (0x52), // 'R' (e.g. for Cortex R4)
+ MicroControllerProfile = (0x4D), // 'M' (e.g. for Cortex M3)
+ SystemProfile = (0x53) // 'S' Application or real-time profile
+ };
+
+ // The following have a lot of common use cases
+ enum {
+ //ARMISAUse (=8), uleb128 and THUMBISAUse (=9), uleb128
+ Not_Allowed = 0,
+ Allowed = 1,
+
+ // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10)
+ AllowFPv2 = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA)
+ AllowFPv3A = 3, // v3 FP ISA permitted (implies use of the v2 FP ISA)
+ AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31
+ AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA)
+ AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31
+
+ // Tag_WMMX_arch, (=11), uleb128
+ AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions)
+
+ // Tag_WMMX_arch, (=11), uleb128
+ AllowWMMXv1 = 2, // The user permitted this entity to use WMMX v2
+
+ // Tag_ABI_FP_denormal, (=20), uleb128
+ PreserveFPSign = 2, // sign when flushed-to-zero is preserved
+
+ // Tag_ABI_FP_number_model, (=23), uleb128
+ AllowRTABI = 2, // numbers, infinities, and one quiet NaN (see [RTABI])
+ AllowIEE754 = 3 // this code to use all the IEEE 754-defined FP encodings
+ };
+}
+
+#endif // __TARGET_ARMBUILDATTRS_H__
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
new file mode 100644
index 0000000..ff7db1f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -0,0 +1,160 @@
+//===-- ARMCallingConv.h - ARM Custom Calling Convention Routines ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the ARM Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMCALLINGCONV_H
+#define ARMCALLINGCONV_H
+
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARM.h"
+
+namespace llvm {
+
+// APCS f64 is in register pairs, possibly split to stack
+static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
+ static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+ // Try to get the first register.
+ if (unsigned Reg = State.AllocateReg(RegList, 4))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else {
+ // For the 2nd half of a v2f64, do not fail.
+ if (CanFail)
+ return false;
+
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 4),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ // Try to get the second register.
+ if (unsigned Reg = State.AllocateReg(RegList, 4))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(4, 4),
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
+ return true; // we handled it
+}
+
+// AAPCS f64 is in aligned register pairs
+static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
+ static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+ static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+ static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
+ if (Reg == 0) {
+ // For the 2nd half of a v2f64, do not just fail.
+ if (CanFail)
+ return false;
+
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 8),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ unsigned T = State.AllocateReg(LoRegList[i]);
+ (void)T;
+ assert(T == LoRegList[i] && "Could not allocate register");
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
+ return true; // we handled it
+}
+
+static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo, CCState &State) {
+ static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+ static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+ if (Reg == 0)
+ return false; // we didn't handle it
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
+ if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
+ return true; // we handled it
+}
+
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+ State);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
new file mode 100644
index 0000000..426ba13
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -0,0 +1,161 @@
+//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for ARM architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+ CCIf<!strconcat("State.getTarget().getSubtarget<ARMSubtarget>().", F), A>;
+
+/// CCIfAlign - Match of the original alignment of the arg
+class CCIfAlign<string Align, CCAction A>:
+ CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_ARM_APCS : CallingConv<[
+
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack
+ CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
+
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToStack<8, 4>>,
+ CCIfType<[v2f64], CCAssignToStack<16, 4>>
+]>;
+
+def RetCC_ARM_APCS : CallingConv<[
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
+
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+def FastCC_ARM_APCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<CC_ARM_APCS>
+]>;
+
+def RetFastCC_ARM_APCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<RetCC_ARM_APCS>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention, common parts
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_Common : CallingConv<[
+
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // i64/f64 is passed in even pairs of GPRs
+ // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
+ // (and the same is true for f64 if VFP is not enabled)
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
+ CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
+ "ArgFlags.getOrigAlign() != 8",
+ CCAssignToReg<[R0, R1, R2, R3]>>>,
+
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToStack<8, 8>>,
+ CCIfType<[v2f64], CCAssignToStack<16, 8>>
+]>;
+
+def RetCC_ARM_AAPCS_Common : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS-VFP (EABI) Calling Convention
+// Also used for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_VFP : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS_VFP : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
new file mode 100644
index 0000000..9bbf6a0
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -0,0 +1,1878 @@
+//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the ARM machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMInstrInfo.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+
+ class ARMCodeEmitter : public MachineFunctionPass {
+ ARMJITInfo *JTI;
+ const ARMInstrInfo *II;
+ const TargetData *TD;
+ const ARMSubtarget *Subtarget;
+ TargetMachine &TM;
+ JITCodeEmitter &MCE;
+ MachineModuleInfo *MMI;
+ const std::vector<MachineConstantPoolEntry> *MCPEs;
+ const std::vector<MachineJumpTableEntry> *MJTEs;
+ bool IsPIC;
+ bool IsThumb;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+ public:
+ ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+ : MachineFunctionPass(ID), JTI(0),
+ II((const ARMInstrInfo *)tm.getInstrInfo()),
+ TD(tm.getTargetData()), TM(tm),
+ MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "ARM Machine Code Emitter";
+ }
+
+ void emitInstruction(const MachineInstr &MI);
+
+ private:
+
+ void emitWordLE(unsigned Binary);
+ void emitDWordLE(uint64_t Binary);
+ void emitConstPoolInstruction(const MachineInstr &MI);
+ void emitMOVi32immInstruction(const MachineInstr &MI);
+ void emitMOVi2piecesInstruction(const MachineInstr &MI);
+ void emitLEApcrelJTInstruction(const MachineInstr &MI);
+ void emitPseudoMoveInstruction(const MachineInstr &MI);
+ void addPCLabel(unsigned LabelID);
+ void emitPseudoInstruction(const MachineInstr &MI);
+ unsigned getMachineSoRegOpValue(const MachineInstr &MI,
+ const TargetInstrDesc &TID,
+ const MachineOperand &MO,
+ unsigned OpIdx);
+
+ unsigned getMachineSoImmOpValue(unsigned SoImm);
+ unsigned getAddrModeSBit(const MachineInstr &MI,
+ const TargetInstrDesc &TID) const;
+
+ void emitDataProcessingInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd = 0,
+ unsigned ImplicitRn = 0);
+
+ void emitLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd = 0,
+ unsigned ImplicitRn = 0);
+
+ void emitMiscLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRn = 0);
+
+ void emitLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+ void emitMulFrmInstruction(const MachineInstr &MI);
+
+ void emitExtendInstruction(const MachineInstr &MI);
+
+ void emitMiscArithInstruction(const MachineInstr &MI);
+
+ void emitSaturateInstruction(const MachineInstr &MI);
+
+ void emitBranchInstruction(const MachineInstr &MI);
+
+ void emitInlineJumpTable(unsigned JTIndex);
+
+ void emitMiscBranchInstruction(const MachineInstr &MI);
+
+ void emitVFPArithInstruction(const MachineInstr &MI);
+
+ void emitVFPConversionInstruction(const MachineInstr &MI);
+
+ void emitVFPLoadStoreInstruction(const MachineInstr &MI);
+
+ void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+ void emitNEONLaneInstruction(const MachineInstr &MI);
+ void emitNEONDupInstruction(const MachineInstr &MI);
+ void emitNEON1RegModImmInstruction(const MachineInstr &MI);
+ void emitNEON2RegInstruction(const MachineInstr &MI);
+ void emitNEON3RegInstruction(const MachineInstr &MI);
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+ unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const {
+ return getMachineOpValue(MI, MI.getOperand(OpIdx));
+ }
+
+ // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the
+ // TableGen'erated getBinaryCodeForInstr() function to encode any
+ // operand values, instead querying getMachineOpValue() directly for
+ // each operand it needs to encode. Thus, any of the new encoder
+ // helper functions can simply return 0 as the values the return
+ // are already handled elsewhere. They are placeholders to allow this
+ // encoder to continue to function until the MC encoder is sufficiently
+ // far along that this one can be eliminated entirely.
+ unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val)
+ const { return 0; }
+ unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val)
+ const { return 0; }
+ unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val)
+ const { return 0; }
+ unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val)
+ const { return 0; }
+ unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getSORegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm12OffsetOpValue(const MachineInstr &MI,unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getRotImmOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getImmMinusOneOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ unsigned getMsbOpValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ uint32_t getLdStmModeOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const {return 0; }
+ uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0; }
+
+ unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+ const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ const MachineOperand &MO = MI.getOperand(Op);
+ const MachineOperand &MO1 = MI.getOperand(Op + 1);
+ if (!MO.isReg()) {
+ emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+ return 0;
+ }
+ unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ int32_t Imm12 = MO1.getImm();
+ uint32_t Binary;
+ Binary = Imm12 & 0xfff;
+ if (Imm12 >= 0)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+ }
+
+ unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const {
+ return 0;
+ }
+
+ uint32_t getAddrMode2OpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModeSOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ const MachineOperand &MO = MI.getOperand(Op);
+ const MachineOperand &MO1 = MI.getOperand(Op + 1);
+ if (!MO.isReg()) {
+ emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+ return 0;
+ }
+ unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ int32_t Imm12 = MO1.getImm();
+
+ // Special value for #-0
+ if (Imm12 == INT32_MIN)
+ Imm12 = 0;
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs
+ // sub.
+ bool isAdd = true;
+ if (Imm12 < 0) {
+ Imm12 = -Imm12;
+ isAdd = false;
+ }
+
+ uint32_t Binary = Imm12 & 0xfff;
+ if (isAdd)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+ }
+ unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+
+ unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+
+ /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+ /// machine operand requires relocation, record the relocation and return
+ /// zero.
+ unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
+ unsigned Reloc);
+
+ /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+ ///
+ unsigned getShiftOp(unsigned Imm) const ;
+
+ /// Routines that handle operands which add machine relocations which are
+ /// fixed up by the relocation stage.
+ void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub, bool Indirect,
+ intptr_t ACPV = 0) const;
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+ void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
+ void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
+ intptr_t JTBase = 0) const;
+ };
+}
+
+char ARMCodeEmitter::ID = 0;
+
+/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM
+/// code to the specified MCE object.
+FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new ARMCodeEmitter(TM, JCE);
+}
+
+bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+ MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ "JIT relocation model must be set to static or default!");
+ JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo();
+ II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo();
+ TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData();
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ MCPEs = &MF.getConstantPool()->getConstants();
+ MJTEs = 0;
+ if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
+ IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+ IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
+ JTI->Initialize(MF, IsPIC);
+ MMI = &getAnalysis<MachineModuleInfo>();
+ MCE.setModuleInfo(MMI);
+
+ do {
+ DEBUG(errs() << "JITTing function '"
+ << MF.getFunction()->getName() << "'\n");
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB) {
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I)
+ emitInstruction(*I);
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+///
+unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
+ switch (ARM_AM::getAM2ShiftOpc(Imm)) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::asr: return 2;
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::ror:
+ case ARM_AM::rrx: return 3;
+ }
+ return 0;
+}
+
+/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+/// machine operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
+ const MachineOperand &MO,
+ unsigned Reloc) {
+ assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw))
+ && "Relocation to this function should be for movt or movw");
+
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), Reloc, true, false);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), Reloc);
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), Reloc);
+ else {
+#ifndef NDEBUG
+ errs() << MO;
+#endif
+ llvm_unreachable("Unsupported operand type for movw/movt");
+ }
+ return 0;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+ if (MO.isReg())
+ return getARMRegisterNumbering(MO.getReg());
+ else if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
+ else if (MO.isCPI()) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ // For VFP load, the immediate offset is multiplied by 4.
+ unsigned Reloc = ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
+ ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry;
+ emitConstPoolAddress(MO.getIndex(), Reloc);
+ } else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative);
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
+ else
+ llvm_unreachable("Unable to encode MachineOperand!");
+ return 0;
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream.
+///
+void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub, bool Indirect,
+ intptr_t ACPV) const {
+ MachineRelocation MR = Indirect
+ ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV),
+ ACPV, MayNeedFarStub)
+ : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV), ACPV,
+ MayNeedFarStub);
+ MCE.addRelocation(MR);
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::
+emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES));
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
+ // Tell JIT emitter we'll resolve the address.
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, 0, true));
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::
+emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTIndex, 0, true));
+}
+
+/// emitMachineBasicBlock - Emit the specified address basic block.
+void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
+ unsigned Reloc,
+ intptr_t JTBase) const {
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Reloc, BB, JTBase));
+}
+
+void ARMCodeEmitter::emitWordLE(unsigned Binary) {
+ DEBUG(errs() << " 0x";
+ errs().write_hex(Binary) << "\n");
+ MCE.emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitDWordLE(uint64_t Binary) {
+ DEBUG(errs() << " 0x";
+ errs().write_hex(Binary) << "\n");
+ MCE.emitDWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
+ DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
+
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
+
+ ++NumEmitted; // Keep track of the # of mi's emitted
+ switch (MI.getDesc().TSFlags & ARMII::FormMask) {
+ default: {
+ llvm_unreachable("Unhandled instruction encoding format!");
+ break;
+ }
+ case ARMII::MiscFrm:
+ if (MI.getOpcode() == ARM::LEApcrelJT) {
+ // Materialize jumptable address.
+ emitLEApcrelJTInstruction(MI);
+ break;
+ }
+ llvm_unreachable("Unhandled instruction encoding!");
+ break;
+ case ARMII::Pseudo:
+ emitPseudoInstruction(MI);
+ break;
+ case ARMII::DPFrm:
+ case ARMII::DPSoRegFrm:
+ emitDataProcessingInstruction(MI);
+ break;
+ case ARMII::LdFrm:
+ case ARMII::StFrm:
+ emitLoadStoreInstruction(MI);
+ break;
+ case ARMII::LdMiscFrm:
+ case ARMII::StMiscFrm:
+ emitMiscLoadStoreInstruction(MI);
+ break;
+ case ARMII::LdStMulFrm:
+ emitLoadStoreMultipleInstruction(MI);
+ break;
+ case ARMII::MulFrm:
+ emitMulFrmInstruction(MI);
+ break;
+ case ARMII::ExtFrm:
+ emitExtendInstruction(MI);
+ break;
+ case ARMII::ArithMiscFrm:
+ emitMiscArithInstruction(MI);
+ break;
+ case ARMII::SatFrm:
+ emitSaturateInstruction(MI);
+ break;
+ case ARMII::BrFrm:
+ emitBranchInstruction(MI);
+ break;
+ case ARMII::BrMiscFrm:
+ emitMiscBranchInstruction(MI);
+ break;
+ // VFP instructions.
+ case ARMII::VFPUnaryFrm:
+ case ARMII::VFPBinaryFrm:
+ emitVFPArithInstruction(MI);
+ break;
+ case ARMII::VFPConv1Frm:
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ case ARMII::VFPConv4Frm:
+ case ARMII::VFPConv5Frm:
+ emitVFPConversionInstruction(MI);
+ break;
+ case ARMII::VFPLdStFrm:
+ emitVFPLoadStoreInstruction(MI);
+ break;
+ case ARMII::VFPLdStMulFrm:
+ emitVFPLoadStoreMultipleInstruction(MI);
+ break;
+
+ // NEON instructions.
+ case ARMII::NGetLnFrm:
+ case ARMII::NSetLnFrm:
+ emitNEONLaneInstruction(MI);
+ break;
+ case ARMII::NDupFrm:
+ emitNEONDupInstruction(MI);
+ break;
+ case ARMII::N1RegModImmFrm:
+ emitNEON1RegModImmInstruction(MI);
+ break;
+ case ARMII::N2RegFrm:
+ emitNEON2RegInstruction(MI);
+ break;
+ case ARMII::N3RegFrm:
+ emitNEON3RegInstruction(MI);
+ break;
+ }
+ MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
+
+void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
+ unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index.
+ unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index.
+ const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex];
+
+ // Remember the CONSTPOOL_ENTRY address for later relocation.
+ JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue());
+
+ // Emit constpool island entry. In most cases, the actual values will be
+ // resolved and relocated after code emission.
+ if (MCPE.isMachineConstantPoolEntry()) {
+ ARMConstantPoolValue *ACPV =
+ static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+ DEBUG(errs() << " ** ARM constant pool #" << CPI << " @ "
+ << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n');
+
+ assert(ACPV->isGlobalValue() && "unsupported constant pool value");
+ const GlobalValue *GV = ACPV->getGV();
+ if (GV) {
+ Reloc::Model RelocM = TM.getRelocationModel();
+ emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
+ isa<Function>(GV),
+ Subtarget->GVIsIndirectSymbol(GV, RelocM),
+ (intptr_t)ACPV);
+ } else {
+ emitExternalSymbolAddress(ACPV->getSymbol(), ARM::reloc_arm_absolute);
+ }
+ emitWordLE(0);
+ } else {
+ const Constant *CV = MCPE.Val.ConstVal;
+
+ DEBUG({
+ errs() << " ** Constant pool #" << CPI << " @ "
+ << (void*)MCE.getCurrentPCValue() << " ";
+ if (const Function *F = dyn_cast<Function>(CV))
+ errs() << F->getName();
+ else
+ errs() << *CV;
+ errs() << '\n';
+ });
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+ emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false);
+ emitWordLE(0);
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ uint32_t Val = uint32_t(*CI->getValue().getRawData());
+ emitWordLE(Val);
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ if (CFP->getType()->isFloatTy())
+ emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ else if (CFP->getType()->isDoubleTy())
+ emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ else {
+ llvm_unreachable("Unable to handle this constantpool entry!");
+ }
+ } else {
+ llvm_unreachable("Unable to handle this constantpool entry!");
+ }
+ }
+}
+
+void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) {
+ const MachineOperand &MO0 = MI.getOperand(0);
+ const MachineOperand &MO1 = MI.getOperand(1);
+
+ // Emit the 'movw' instruction.
+ unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000
+
+ unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF;
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode imm16 as imm4:imm12
+ Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12
+ Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4
+ emitWordLE(Binary);
+
+ unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16;
+ // Emit the 'movt' instruction.
+ Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode imm16 as imm4:imm1, same as movw above.
+ Binary |= Hi16 & 0xFFF;
+ Binary |= ((Hi16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
+ const MachineOperand &MO0 = MI.getOperand(0);
+ const MachineOperand &MO1 = MI.getOperand(1);
+ assert(MO1.isImm() && ARM_AM::isSOImmTwoPartVal(MO1.getImm()) &&
+ "Not a valid so_imm value!");
+ unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm());
+ unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm());
+
+ // Emit the 'mov' instruction.
+ unsigned Binary = 0xd << 21; // mov: Insts{24-21} = 0b1101
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode so_imm.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>
+ Binary |= 1 << ARMII::I_BitShift;
+ Binary |= getMachineSoImmOpValue(V1);
+ emitWordLE(Binary);
+
+ // Now the 'orr' instruction.
+ Binary = 0xc << 21; // orr: Insts{24-21} = 0b1100
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift;
+
+ // Encode so_imm.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>
+ Binary |= 1 << ARMII::I_BitShift;
+ Binary |= getMachineSoImmOpValue(V2);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
+ // It's basically add r, pc, (LJTI - $+8)
+
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Emit the 'add' instruction.
+ unsigned Binary = 0x4 << 21; // add: Insts{24-21} = 0b0100
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, TID);
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode Rn which is PC.
+ Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+
+ // Encode the displacement.
+ Binary |= 1 << ARMII::I_BitShift;
+ emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) {
+ unsigned Opcode = MI.getDesc().Opcode;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag)
+ Binary |= 1 << ARMII::S_BitShift;
+
+ // Encode register def if there is one.
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode the shift operation.
+ switch (Opcode) {
+ default: break;
+ case ARM::RRX:
+ // rrx
+ Binary |= 0x6 << 4;
+ break;
+ case ARM::MOVsrl_flag:
+ // lsr #1
+ Binary |= (0x2 << 4) | (1 << 7);
+ break;
+ case ARM::MOVsra_flag:
+ // asr #1
+ Binary |= (0x4 << 4) | (1 << 7);
+ break;
+ }
+
+ // Encode register Rm.
+ Binary |= getMachineOpValue(MI, 1);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::addPCLabel(unsigned LabelID) {
+ DEBUG(errs() << " ** LPC" << LabelID << " @ "
+ << (void*)MCE.getCurrentPCValue() << '\n');
+ JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue());
+}
+
+void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
+ unsigned Opcode = MI.getDesc().Opcode;
+ switch (Opcode) {
+ default:
+ llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
+ case ARM::BX_CALL:
+ case ARM::BMOVPCRX_CALL:
+ case ARM::BXr9_CALL:
+ case ARM::BMOVPCRXr9_CALL: {
+ // First emit mov lr, pc
+ unsigned Binary = 0x01a0e00f;
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+ emitWordLE(Binary);
+
+ // and then emit the branch.
+ emitMiscBranchInstruction(MI);
+ break;
+ }
+ case TargetOpcode::INLINEASM: {
+ // We allow inline assembler nodes with empty bodies - they can
+ // implicitly define registers, which is ok for JIT.
+ if (MI.getOperand(0).getSymbolName()[0]) {
+ report_fatal_error("JIT does not support inline asm!");
+ }
+ break;
+ }
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+ break;
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ // Do nothing.
+ break;
+ case ARM::CONSTPOOL_ENTRY:
+ emitConstPoolInstruction(MI);
+ break;
+ case ARM::PICADD: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // PICADD is just an add instruction that implicitly read pc.
+ emitDataProcessingInstruction(MI, 0, ARM::PC);
+ break;
+ }
+ case ARM::PICLDR:
+ case ARM::PICLDRB:
+ case ARM::PICSTR:
+ case ARM::PICSTRB: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // These are just load / store instructions that implicitly read pc.
+ emitLoadStoreInstruction(MI, 0, ARM::PC);
+ break;
+ }
+ case ARM::PICLDRH:
+ case ARM::PICLDRSH:
+ case ARM::PICLDRSB:
+ case ARM::PICSTRH: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // These are just load / store instructions that implicitly read pc.
+ emitMiscLoadStoreInstruction(MI, ARM::PC);
+ break;
+ }
+
+ case ARM::MOVi32imm:
+ // Two instructions to materialize a constant.
+ if (Subtarget->hasV6T2Ops())
+ emitMOVi32immInstruction(MI);
+ else
+ emitMOVi2piecesInstruction(MI);
+ break;
+
+ case ARM::LEApcrelJT:
+ // Materialize jumptable address.
+ emitLEApcrelJTInstruction(MI);
+ break;
+ case ARM::RRX:
+ case ARM::MOVsrl_flag:
+ case ARM::MOVsra_flag:
+ emitPseudoMoveInstruction(MI);
+ break;
+ }
+}
+
+unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
+ const TargetInstrDesc &TID,
+ const MachineOperand &MO,
+ unsigned OpIdx) {
+ unsigned Binary = getMachineOpValue(MI, MO);
+
+ const MachineOperand &MO1 = MI.getOperand(OpIdx + 1);
+ const MachineOperand &MO2 = MI.getOperand(OpIdx + 2);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ unsigned Rs = MO1.getReg();
+ if (Rs) {
+ // Set shift operand (bit[7:4]).
+ // LSL - 0001
+ // LSR - 0011
+ // ASR - 0101
+ // ROR - 0111
+ // RRX - 0110 and bit[11:8] clear.
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x1; break;
+ case ARM_AM::lsr: SBits = 0x3; break;
+ case ARM_AM::asr: SBits = 0x5; break;
+ case ARM_AM::ror: SBits = 0x7; break;
+ case ARM_AM::rrx: SBits = 0x6; break;
+ }
+ } else {
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ }
+ }
+ Binary |= SBits << 4;
+ if (SOpc == ARM_AM::rrx)
+ return Binary;
+
+ // Encode the shift operation Rs or shift_imm (except rrx).
+ if (Rs) {
+ // Encode Rs bit[11:8].
+ assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+ return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ }
+
+ // Encode shift_imm bit[11:7].
+ return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+}
+
+unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
+ int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ // Encode rotate_imm.
+ unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
+ << ARMII::SoRotImmShift;
+
+ // Encode immed_8.
+ Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
+ return Binary;
+}
+
+unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
+ const TargetInstrDesc &TID) const {
+ for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){
+ const MachineOperand &MO = MI.getOperand(i-1);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
+ return 1 << ARMII::S_BitShift;
+ }
+ return 0;
+}
+
+void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd,
+ unsigned ImplicitRn) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, TID);
+
+ // Encode register def if there is one.
+ unsigned NumDefs = TID.getNumDefs();
+ unsigned OpIdx = 0;
+ if (NumDefs)
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+ else if (ImplicitRd)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+
+ if (TID.Opcode == ARM::MOVi16) {
+ // Get immediate from MI.
+ unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx),
+ ARM::reloc_arm_movw);
+ // Encode imm which is the same as in emitMOVi32immInstruction().
+ Binary |= Lo16 & 0xFFF;
+ Binary |= ((Lo16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+ return;
+ } else if(TID.Opcode == ARM::MOVTi16) {
+ unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx),
+ ARM::reloc_arm_movt) >> 16);
+ Binary |= Hi16 & 0xFFF;
+ Binary |= ((Hi16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+ return;
+ } else if ((TID.Opcode == ARM::BFC) || (TID.Opcode == ARM::BFI)) {
+ uint32_t v = ~MI.getOperand(2).getImm();
+ int32_t lsb = CountTrailingZeros_32(v);
+ int32_t msb = (32 - CountLeadingZeros_32(v)) - 1;
+ // Instr{20-16} = msb, Instr{11-7} = lsb
+ Binary |= (msb & 0x1F) << 16;
+ Binary |= (lsb & 0x1F) << 7;
+ emitWordLE(Binary);
+ return;
+ } else if ((TID.Opcode == ARM::UBFX) || (TID.Opcode == ARM::SBFX)) {
+ // Encode Rn in Instr{0-3}
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ uint32_t lsb = MI.getOperand(OpIdx++).getImm();
+ uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1;
+
+ // Instr{20-16} = widthm1, Instr{11-7} = lsb
+ Binary |= (widthm1 & 0x1F) << 16;
+ Binary |= (lsb & 0x1F) << 7;
+ emitWordLE(Binary);
+ return;
+ }
+
+ // If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ // Encode first non-shifter register operand if there is one.
+ bool isUnary = TID.TSFlags & ARMII::UnaryDP;
+ if (!isUnary) {
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ else {
+ Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
+ ++OpIdx;
+ }
+ }
+
+ // Encode shifter operand.
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ if ((TID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
+ // Encode SoReg.
+ emitWordLE(Binary | getMachineSoRegOpValue(MI, TID, MO, OpIdx));
+ return;
+ }
+
+ if (MO.isReg()) {
+ // Encode register Rm.
+ emitWordLE(Binary | getARMRegisterNumbering(MO.getReg()));
+ return;
+ }
+
+ // Encode so_imm.
+ Binary |= getMachineSoImmOpValue((unsigned)MO.getImm());
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd,
+ unsigned ImplicitRn) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Form = TID.TSFlags & ARMII::FormMask;
+ bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done.
+ if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp ||
+ MI.getOpcode() == ARM::STRi12) {
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Operand 0 of a pre- and post-indexed store is the address base
+ // writeback. Skip it.
+ bool Skipped = false;
+ if (IsPrePost && Form == ARMII::StFrm) {
+ ++OpIdx;
+ Skipped = true;
+ }
+
+ // Set first operand
+ if (ImplicitRd)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ // Set second operand
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // If this is a two-address operand, skip it. e.g. LDR_PRE.
+ if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ unsigned AM2Opc = (ImplicitRn == ARM::PC)
+ ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+ // Set bit U(23) according to sign of immed value (positive or negative).
+ Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) <<
+ ARMII::U_BitShift);
+ if (!MO2.getReg()) { // is immediate
+ if (ARM_AM::getAM2Offset(AM2Opc))
+ // Set the value of offset_12 field
+ Binary |= ARM_AM::getAM2Offset(AM2Opc);
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Set bit I(25), because this is not in immediate encoding.
+ Binary |= 1 << ARMII::I_BitShift;
+ assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
+ // Set bit[3:0] to the corresponding Rm register
+ Binary |= getARMRegisterNumbering(MO2.getReg());
+
+ // If this instr is in scaled register offset/index instruction, set
+ // shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
+ if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) {
+ Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift; // shift
+ Binary |= ShImm << ARMII::ShiftShift; // shift_immed
+ }
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRn) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Form = TID.TSFlags & ARMII::FormMask;
+ bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Operand 0 of a pre- and post-indexed store is the address base
+ // writeback. Skip it.
+ bool Skipped = false;
+ if (IsPrePost && Form == ARMII::StMiscFrm) {
+ ++OpIdx;
+ Skipped = true;
+ }
+
+ // Set first operand
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ // Skip LDRD and STRD's second operand.
+ if (TID.Opcode == ARM::LDRD || TID.Opcode == ARM::STRD)
+ ++OpIdx;
+
+ // Set second operand
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // If this is a two-address operand, skip it. e.g. LDRH_POST.
+ if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ unsigned AM3Opc = (ImplicitRn == ARM::PC)
+ ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+ // Set bit U(23) according to sign of immed value (positive or negative)
+ Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) <<
+ ARMII::U_BitShift);
+
+ // If this instr is in register offset/index encoding, set bit[3:0]
+ // to the corresponding Rm register.
+ if (MO2.getReg()) {
+ Binary |= getARMRegisterNumbering(MO2.getReg());
+ emitWordLE(Binary);
+ return;
+ }
+
+ // This instr is in immediate offset/index encoding, set bit 22 to 1.
+ Binary |= 1 << ARMII::AM3_I_BitShift;
+ if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) {
+ // Set operands
+ Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift; // immedH
+ Binary |= (ImmOffs & 0xF); // immedL
+ }
+
+ emitWordLE(Binary);
+}
+
+static unsigned getAddrModeUPBits(unsigned Mode) {
+ unsigned Binary = 0;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ // IA - Increment after - bit U = 1 and bit P = 0
+ // IB - Increment before - bit U = 1 and bit P = 1
+ // DA - Decrement after - bit U = 0 and bit P = 0
+ // DB - Decrement before - bit U = 0 and bit P = 1
+ switch (Mode) {
+ default: llvm_unreachable("Unknown addressing sub-mode!");
+ case ARM_AM::da: break;
+ case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break;
+ case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break;
+ case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break;
+ }
+
+ return Binary;
+}
+
+void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Skip operand 0 of an instruction with base register update.
+ unsigned OpIdx = 0;
+ if (IsUpdating)
+ ++OpIdx;
+
+ // Set base address operand
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
+
+ // Set bit W(21)
+ if (IsUpdating)
+ Binary |= 0x1 << ARMII::W_BitShift;
+
+ // Set registers
+ for (unsigned i = OpIdx+2, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ break;
+ unsigned RegNum = getARMRegisterNumbering(MO.getReg());
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ RegNum < 16);
+ Binary |= 0x1 << RegNum;
+ }
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, TID);
+
+ // 32x32->64bit operations have two destination registers. The number
+ // of register definitions will tell us if that's what we're dealing with.
+ unsigned OpIdx = 0;
+ if (TID.getNumDefs() == 2)
+ Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift;
+
+ // Encode Rm
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ // Encode Rs
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift;
+
+ // Many multiple instructions (e.g. MLA) have three src operands. Encode
+ // it as Rn (for multiply, that's in the same offset as RdLo.
+ if (TID.getNumOperands() > OpIdx &&
+ !TID.OpInfo[OpIdx].isPredicate() &&
+ !TID.OpInfo[OpIdx].isOptionalDef())
+ Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift;
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ const MachineOperand &MO1 = MI.getOperand(OpIdx++);
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ if (MO2.isReg()) {
+ // Two register operand form.
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift;
+
+ // Encode Rm.
+ Binary |= getMachineOpValue(MI, MO2);
+ ++OpIdx;
+ } else {
+ Binary |= getMachineOpValue(MI, MO1);
+ }
+
+ // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand.
+ if (MI.getOperand(OpIdx).isImm() &&
+ !TID.OpInfo[OpIdx].isPredicate() &&
+ !TID.OpInfo[OpIdx].isOptionalDef())
+ Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift;
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ const MachineOperand &MO = MI.getOperand(OpIdx++);
+ if (OpIdx == TID.getNumOperands() ||
+ TID.OpInfo[OpIdx].isPredicate() ||
+ TID.OpInfo[OpIdx].isOptionalDef()) {
+ // Encode Rm and it's done.
+ Binary |= getMachineOpValue(MI, MO);
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift;
+
+ // Encode Rm.
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ // Encode shift_imm.
+ unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
+ if (TID.Opcode == ARM::PKHTB) {
+ assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!");
+ if (ShiftAmt == 32)
+ ShiftAmt = 0;
+ }
+ assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+ Binary |= ShiftAmt << ARMII::ShiftShift;
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGen.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode saturate bit position.
+ unsigned Pos = MI.getOperand(1).getImm();
+ if (TID.Opcode == ARM::SSAT || TID.Opcode == ARM::SSAT16)
+ Pos -= 1;
+ assert((Pos < 16 || (Pos < 32 &&
+ TID.Opcode != ARM::SSAT16 &&
+ TID.Opcode != ARM::USAT16)) &&
+ "saturate bit position out of range");
+ Binary |= Pos << 16;
+
+ // Encode Rm
+ Binary |= getMachineOpValue(MI, 2);
+
+ // Encode shift_imm.
+ if (TID.getNumOperands() == 4) {
+ unsigned ShiftOp = MI.getOperand(3).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ if (Opc == ARM_AM::asr)
+ Binary |= (1 << 6);
+ unsigned ShiftAmt = MI.getOperand(3).getImm();
+ if (ShiftAmt == 32 && Opc == ARM_AM::asr)
+ ShiftAmt = 0;
+ assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+ Binary |= ShiftAmt << ARMII::ShiftShift;
+ }
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ if (TID.Opcode == ARM::TPsoft) {
+ llvm_unreachable("ARM::TPsoft FIXME"); // FIXME
+ }
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Set signed_immed_24 field
+ Binary |= getMachineOpValue(MI, 0);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) {
+ // Remember the base address of the inline jump table.
+ uintptr_t JTBase = MCE.getCurrentPCValue();
+ JTI->addJumpTableBaseAddr(JTIndex, JTBase);
+ DEBUG(errs() << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase
+ << '\n');
+
+ // Now emit the jump table entries.
+ const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs;
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+ if (IsPIC)
+ // DestBB address - JT base.
+ emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase);
+ else
+ // Absolute DestBB address.
+ emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute);
+ emitWordLE(0);
+ }
+}
+
+void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Handle jump tables.
+ if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) {
+ // First emit a ldr pc, [] instruction.
+ emitDataProcessingInstruction(MI, ARM::PC);
+
+ // Then emit the inline jump table.
+ unsigned JTIndex =
+ (TID.Opcode == ARM::BR_JTr)
+ ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
+ emitInlineJumpTable(JTIndex);
+ return;
+ } else if (TID.Opcode == ARM::BR_JTm) {
+ // First emit a ldr pc, [] instruction.
+ emitLoadStoreInstruction(MI, ARM::PC);
+
+ // Then emit the inline jump table.
+ emitInlineJumpTable(MI.getOperand(3).getIndex());
+ return;
+ }
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ if (TID.Opcode == ARM::BX_RET || TID.Opcode == ARM::MOVPCLR)
+ // The return register is LR.
+ Binary |= getARMRegisterNumbering(ARM::LR);
+ else
+ // otherwise, set the return register
+ Binary |= getMachineOpValue(MI, 0);
+
+ emitWordLE(Binary);
+}
+
+static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegD = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = ARM::SPRRegisterClass->contains(RegD);
+ RegD = getARMRegisterNumbering(RegD);
+ if (!isSPVFP)
+ Binary |= RegD << ARMII::RegRdShift;
+ else {
+ Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift;
+ Binary |= (RegD & 0x01) << ARMII::D_BitShift;
+ }
+ return Binary;
+}
+
+static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegN = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = ARM::SPRRegisterClass->contains(RegN);
+ RegN = getARMRegisterNumbering(RegN);
+ if (!isSPVFP)
+ Binary |= RegN << ARMII::RegRnShift;
+ else {
+ Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift;
+ Binary |= (RegN & 0x01) << ARMII::N_BitShift;
+ }
+ return Binary;
+}
+
+static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegM = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = ARM::SPRRegisterClass->contains(RegM);
+ RegM = getARMRegisterNumbering(RegM);
+ if (!isSPVFP)
+ Binary |= RegM;
+ else {
+ Binary |= ((RegM & 0x1E) >> 1);
+ Binary |= (RegM & 0x01) << ARMII::M_BitShift;
+ }
+ return Binary;
+}
+
+void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+ assert((Binary & ARMII::D_BitShift) == 0 &&
+ (Binary & ARMII::N_BitShift) == 0 &&
+ (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!");
+
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, OpIdx++);
+
+ // If this is a two-address operand, skip it, e.g. FMACD.
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ // Encode Dn / Sn.
+ if ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
+ Binary |= encodeVFPRn(MI, OpIdx++);
+
+ if (OpIdx == TID.getNumOperands() ||
+ TID.OpInfo[OpIdx].isPredicate() ||
+ TID.OpInfo[OpIdx].isOptionalDef()) {
+ // FCMPEZD etc. has only one operand.
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, OpIdx);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Form = TID.TSFlags & ARMII::FormMask;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ switch (Form) {
+ default: break;
+ case ARMII::VFPConv1Frm:
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, 0);
+ break;
+ case ARMII::VFPConv4Frm:
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 0);
+ break;
+ case ARMII::VFPConv5Frm:
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 0);
+ break;
+ }
+
+ switch (Form) {
+ default: break;
+ case ARMII::VFPConv1Frm:
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 1);
+ break;
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 1);
+ break;
+ case ARMII::VFPConv4Frm:
+ case ARMII::VFPConv5Frm:
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, 1);
+ break;
+ }
+
+ if (Form == ARMII::VFPConv5Frm)
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 2);
+ else if (Form == ARMII::VFPConv3Frm)
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 2);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, OpIdx++);
+
+ // Encode address base.
+ const MachineOperand &Base = MI.getOperand(OpIdx++);
+ Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift;
+
+ // If there is a non-zero immediate offset, encode it.
+ if (Base.isReg()) {
+ const MachineOperand &Offset = MI.getOperand(OpIdx);
+ if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) {
+ if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add)
+ Binary |= 1 << ARMII::U_BitShift;
+ Binary |= ImmOffs;
+ emitWordLE(Binary);
+ return;
+ }
+ }
+
+ // If immediate offset is omitted, default to +0.
+ Binary |= 1 << ARMII::U_BitShift;
+
+ emitWordLE(Binary);
+}
+
+void
+ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Skip operand 0 of an instruction with base register update.
+ unsigned OpIdx = 0;
+ if (IsUpdating)
+ ++OpIdx;
+
+ // Set base address operand
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
+
+ // Set bit W(21)
+ if (IsUpdating)
+ Binary |= 0x1 << ARMII::W_BitShift;
+
+ // First register is encoded in Dd.
+ Binary |= encodeVFPRd(MI, OpIdx+2);
+
+ // Count the number of registers.
+ unsigned NumRegs = 1;
+ for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ break;
+ ++NumRegs;
+ }
+ // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0)
+ // Otherwise, it will be 0, in the case of 32-bit registers.
+ if(Binary & 0x100)
+ Binary |= NumRegs * 2;
+ else
+ Binary |= NumRegs;
+
+ emitWordLE(Binary);
+}
+
+static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegD = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegD = getARMRegisterNumbering(RegD);
+ Binary |= (RegD & 0xf) << ARMII::RegRdShift;
+ Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
+ return Binary;
+}
+
+static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegN = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegN = getARMRegisterNumbering(RegN);
+ Binary |= (RegN & 0xf) << ARMII::RegRnShift;
+ Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
+ return Binary;
+}
+
+static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegM = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegM = getARMRegisterNumbering(RegM);
+ Binary |= (RegM & 0xf);
+ Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
+ return Binary;
+}
+
+/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON
+/// data-processing instruction to the corresponding Thumb encoding.
+static unsigned convertNEONDataProcToThumb(unsigned Binary) {
+ assert((Binary & 0xfe000000) == 0xf2000000 &&
+ "not an ARM NEON data-processing instruction");
+ unsigned UBit = (Binary >> 24) & 1;
+ return 0xef000000 | (UBit << 28) | (Binary & 0xffffff);
+}
+
+void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ unsigned RegTOpIdx, RegNOpIdx, LnOpIdx;
+ const TargetInstrDesc &TID = MI.getDesc();
+ if ((TID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
+ RegTOpIdx = 0;
+ RegNOpIdx = 1;
+ LnOpIdx = 2;
+ } else { // ARMII::NSetLnFrm
+ RegTOpIdx = 2;
+ RegNOpIdx = 0;
+ LnOpIdx = 3;
+ }
+
+ // Set the conditional execution predicate
+ Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+ unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
+ RegT = getARMRegisterNumbering(RegT);
+ Binary |= (RegT << ARMII::RegRdShift);
+ Binary |= encodeNEONRn(MI, RegNOpIdx);
+
+ unsigned LaneShift;
+ if ((Binary & (1 << 22)) != 0)
+ LaneShift = 0; // 8-bit elements
+ else if ((Binary & (1 << 5)) != 0)
+ LaneShift = 1; // 16-bit elements
+ else
+ LaneShift = 2; // 32-bit elements
+
+ unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift;
+ unsigned Opc1 = Lane >> 2;
+ unsigned Opc2 = Lane & 3;
+ assert((Opc1 & 3) == 0 && "out-of-range lane number operand");
+ Binary |= (Opc1 << 21);
+ Binary |= (Opc2 << 5);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+ unsigned RegT = MI.getOperand(1).getReg();
+ RegT = getARMRegisterNumbering(RegT);
+ Binary |= (RegT << ARMII::RegRdShift);
+ Binary |= encodeNEONRn(MI, 0);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd.
+ Binary |= encodeNEONRd(MI, 0);
+ // Immediate fields: Op, Cmode, I, Imm3, Imm4
+ unsigned Imm = MI.getOperand(1).getImm();
+ unsigned Op = (Imm >> 12) & 1;
+ unsigned Cmode = (Imm >> 8) & 0xf;
+ unsigned I = (Imm >> 7) & 1;
+ unsigned Imm3 = (Imm >> 4) & 0x7;
+ unsigned Imm4 = Imm & 0xf;
+ Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4;
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd; source register in Dm.
+ unsigned OpIdx = 0;
+ Binary |= encodeNEONRd(MI, OpIdx++);
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRm(MI, OpIdx);
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ // FIXME: This does not handle VDUPfdf or VDUPfqf.
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd; source registers in Dn and Dm.
+ unsigned OpIdx = 0;
+ Binary |= encodeNEONRd(MI, OpIdx++);
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRn(MI, OpIdx++);
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRm(MI, OpIdx);
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ // FIXME: This does not handle VMOVDneon or VMOVQ.
+ emitWordLE(Binary);
+}
+
+#include "ARMGenCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
new file mode 100644
index 0000000..13d1b33
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -0,0 +1,1900 @@
+//===-- ARMConstantIslandPass.cpp - ARM constant islands ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that splits the constant pool up into 'islands'
+// which are scattered through-out the function. This is required due to the
+// limited pc-relative displacements that ARM has.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-cp-islands"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMInstrInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumCPEs, "Number of constpool entries");
+STATISTIC(NumSplit, "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed, "Number of cond branches fixed");
+STATISTIC(NumUBrFixed, "Number of uncond branches fixed");
+STATISTIC(NumTBs, "Number of table branches generated");
+STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
+STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
+STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
+STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
+STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
+
+
+static cl::opt<bool>
+AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
+ cl::desc("Adjust basic block layout to better use TB[BH]"));
+
+namespace {
+ /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
+ /// requires constant pool entries to be scattered among the instructions
+ /// inside a function. To do this, it completely ignores the normal LLVM
+ /// constant pool; instead, it places constants wherever it feels like with
+ /// special instructions.
+ ///
+ /// The terminology used in this pass includes:
+ /// Islands - Clumps of constants placed in the function.
+ /// Water - Potential places where an island could be formed.
+ /// CPE - A constant pool entry that has been placed somewhere, which
+ /// tracks a list of users.
+ class ARMConstantIslands : public MachineFunctionPass {
+ /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
+ /// by MBB Number. The two-byte pads required for Thumb alignment are
+ /// counted as part of the following block (i.e., the offset and size for
+ /// a padded block will both be ==2 mod 4).
+ std::vector<unsigned> BBSizes;
+
+ /// BBOffsets - the offset of each MBB in bytes, starting from 0.
+ /// The two-byte pads required for Thumb alignment are counted as part of
+ /// the following block.
+ std::vector<unsigned> BBOffsets;
+
+ /// WaterList - A sorted list of basic blocks where islands could be placed
+ /// (i.e. blocks that don't fall through to the following block, due
+ /// to a return, unreachable, or unconditional branch).
+ std::vector<MachineBasicBlock*> WaterList;
+
+ /// NewWaterList - The subset of WaterList that was created since the
+ /// previous iteration by inserting unconditional branches.
+ SmallSet<MachineBasicBlock*, 4> NewWaterList;
+
+ typedef std::vector<MachineBasicBlock*>::iterator water_iterator;
+
+ /// CPUser - One user of a constant pool, keeping the machine instruction
+ /// pointer, the constant pool being referenced, and the max displacement
+ /// allowed from the instruction to the CP. The HighWaterMark records the
+ /// highest basic block where a new CPEntry can be placed. To ensure this
+ /// pass terminates, the CP entries are initially placed at the end of the
+ /// function and then move monotonically to lower addresses. The
+ /// exception to this rule is when the current CP entry for a particular
+ /// CPUser is out of range, but there is another CP entry for the same
+ /// constant value in range. We want to use the existing in-range CP
+ /// entry, but if it later moves out of range, the search for new water
+ /// should resume where it left off. The HighWaterMark is used to record
+ /// that point.
+ struct CPUser {
+ MachineInstr *MI;
+ MachineInstr *CPEMI;
+ MachineBasicBlock *HighWaterMark;
+ unsigned MaxDisp;
+ bool NegOk;
+ bool IsSoImm;
+ CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
+ bool neg, bool soimm)
+ : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {
+ HighWaterMark = CPEMI->getParent();
+ }
+ };
+
+ /// CPUsers - Keep track of all of the machine instructions that use various
+ /// constant pools and their max displacement.
+ std::vector<CPUser> CPUsers;
+
+ /// CPEntry - One per constant pool entry, keeping the machine instruction
+ /// pointer, the constpool index, and the number of CPUser's which
+ /// reference this entry.
+ struct CPEntry {
+ MachineInstr *CPEMI;
+ unsigned CPI;
+ unsigned RefCount;
+ CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
+ : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
+ };
+
+ /// CPEntries - Keep track of all of the constant pool entry machine
+ /// instructions. For each original constpool index (i.e. those that
+ /// existed upon entry to this pass), it keeps a vector of entries.
+ /// Original elements are cloned as we go along; the clones are
+ /// put in the vector of the original element, but have distinct CPIs.
+ std::vector<std::vector<CPEntry> > CPEntries;
+
+ /// ImmBranch - One per immediate branch, keeping the machine instruction
+ /// pointer, conditional or unconditional, the max displacement,
+ /// and (if isCond is true) the corresponding unconditional branch
+ /// opcode.
+ struct ImmBranch {
+ MachineInstr *MI;
+ unsigned MaxDisp : 31;
+ bool isCond : 1;
+ int UncondBr;
+ ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
+ : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
+ };
+
+ /// ImmBranches - Keep track of all the immediate branch instructions.
+ ///
+ std::vector<ImmBranch> ImmBranches;
+
+ /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
+ ///
+ SmallVector<MachineInstr*, 4> PushPopMIs;
+
+ /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions.
+ SmallVector<MachineInstr*, 4> T2JumpTables;
+
+ /// HasFarJump - True if any far jump instruction has been emitted during
+ /// the branch fix up pass.
+ bool HasFarJump;
+
+ /// HasInlineAsm - True if the function contains inline assembly.
+ bool HasInlineAsm;
+
+ const ARMInstrInfo *TII;
+ const ARMSubtarget *STI;
+ ARMFunctionInfo *AFI;
+ bool isThumb;
+ bool isThumb1;
+ bool isThumb2;
+ public:
+ static char ID;
+ ARMConstantIslands() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "ARM constant island placement and branch shortening pass";
+ }
+
+ private:
+ void DoInitialPlacement(MachineFunction &MF,
+ std::vector<MachineInstr*> &CPEMIs);
+ CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+ void JumpTableFunctionScan(MachineFunction &MF);
+ void InitialFunctionScan(MachineFunction &MF,
+ const std::vector<MachineInstr*> &CPEMIs);
+ MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
+ void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+ void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
+ bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
+ int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
+ bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter);
+ void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
+ MachineBasicBlock *&NewMBB);
+ bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex);
+ void RemoveDeadCPEMI(MachineInstr *CPEMI);
+ bool RemoveUnusedCPEntries();
+ bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned Disp, bool NegOk,
+ bool DoDump = false);
+ bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
+ CPUser &U);
+ bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+ unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+ bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+ bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br);
+ bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br);
+ bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br);
+ bool UndoLRSpillRestore();
+ bool OptimizeThumb2Instructions(MachineFunction &MF);
+ bool OptimizeThumb2Branches(MachineFunction &MF);
+ bool ReorderThumb2JumpTables(MachineFunction &MF);
+ bool OptimizeThumb2JumpTables(MachineFunction &MF);
+ MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
+ MachineBasicBlock *JTBB);
+
+ unsigned GetOffsetOf(MachineInstr *MI) const;
+ void dumpBBs();
+ void verify(MachineFunction &MF);
+ };
+ char ARMConstantIslands::ID = 0;
+}
+
+/// verify - check BBOffsets, BBSizes, alignment of islands
+void ARMConstantIslands::verify(MachineFunction &MF) {
+ assert(BBOffsets.size() == BBSizes.size());
+ for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
+ assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
+ if (!isThumb)
+ return;
+#ifndef NDEBUG
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ if (!MBB->empty() &&
+ MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+ unsigned MBBId = MBB->getNumber();
+ assert(HasInlineAsm ||
+ (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
+ (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
+ }
+ }
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+ CPUser &U = CPUsers[i];
+ unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8);
+ unsigned CPEOffset = GetOffsetOf(U.CPEMI);
+ unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset :
+ UserOffset - CPEOffset;
+ assert(Disp <= U.MaxDisp || "Constant pool entry out of range!");
+ }
+#endif
+}
+
+/// print block size and offset information - debugging
+void ARMConstantIslands::dumpBBs() {
+ for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
+ DEBUG(errs() << "block " << J << " offset " << BBOffsets[J]
+ << " size " << BBSizes[J] << "\n");
+ }
+}
+
+/// createARMConstantIslandPass - returns an instance of the constpool
+/// island pass.
+FunctionPass *llvm::createARMConstantIslandPass() {
+ return new ARMConstantIslands();
+}
+
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
+ MachineConstantPool &MCP = *MF.getConstantPool();
+
+ TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
+ AFI = MF.getInfo<ARMFunctionInfo>();
+ STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
+
+ isThumb = AFI->isThumbFunction();
+ isThumb1 = AFI->isThumb1OnlyFunction();
+ isThumb2 = AFI->isThumb2Function();
+
+ HasFarJump = false;
+ HasInlineAsm = false;
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ MF.RenumberBlocks();
+
+ // Try to reorder and otherwise adjust the block layout to make good use
+ // of the TB[BH] instructions.
+ bool MadeChange = false;
+ if (isThumb2 && AdjustJumpTableBlocks) {
+ JumpTableFunctionScan(MF);
+ MadeChange |= ReorderThumb2JumpTables(MF);
+ // Data is out of date, so clear it. It'll be re-computed later.
+ T2JumpTables.clear();
+ // Blocks may have shifted around. Keep the numbering up to date.
+ MF.RenumberBlocks();
+ }
+
+ // Thumb1 functions containing constant pools get 4-byte alignment.
+ // This is so we can keep exact track of where the alignment padding goes.
+
+ // ARM and Thumb2 functions need to be 4-byte aligned.
+ if (!isThumb1)
+ MF.EnsureAlignment(2); // 2 = log2(4)
+
+ // Perform the initial placement of the constant pool entries. To start with,
+ // we put them all at the end of the function.
+ std::vector<MachineInstr*> CPEMIs;
+ if (!MCP.isEmpty()) {
+ DoInitialPlacement(MF, CPEMIs);
+ if (isThumb1)
+ MF.EnsureAlignment(2); // 2 = log2(4)
+ }
+
+ /// The next UID to take is the first unused one.
+ AFI->initPICLabelUId(CPEMIs.size());
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block, the location of all the water, and finding all of the
+ // constant pool users.
+ InitialFunctionScan(MF, CPEMIs);
+ CPEMIs.clear();
+ DEBUG(dumpBBs());
+
+
+ /// Remove dead constant pool entries.
+ MadeChange |= RemoveUnusedCPEntries();
+
+ // Iteratively place constant pool entries and fix up branches until there
+ // is no change.
+ unsigned NoCPIters = 0, NoBRIters = 0;
+ while (true) {
+ bool CPChange = false;
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+ CPChange |= HandleConstantPoolUser(MF, i);
+ if (CPChange && ++NoCPIters > 30)
+ llvm_unreachable("Constant Island pass failed to converge!");
+ DEBUG(dumpBBs());
+
+ // Clear NewWaterList now. If we split a block for branches, it should
+ // appear as "new water" for the next iteration of constant pool placement.
+ NewWaterList.clear();
+
+ bool BRChange = false;
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+ BRChange |= FixUpImmediateBr(MF, ImmBranches[i]);
+ if (BRChange && ++NoBRIters > 30)
+ llvm_unreachable("Branch Fix Up pass failed to converge!");
+ DEBUG(dumpBBs());
+
+ if (!CPChange && !BRChange)
+ break;
+ MadeChange = true;
+ }
+
+ // Shrink 32-bit Thumb2 branch, load, and store instructions.
+ if (isThumb2 && !STI->prefers32BitThumb())
+ MadeChange |= OptimizeThumb2Instructions(MF);
+
+ // After a while, this might be made debug-only, but it is not expensive.
+ verify(MF);
+
+ // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
+ // undo the spill / restore of LR if possible.
+ if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
+ MadeChange |= UndoLRSpillRestore();
+
+ // Save the mapping between original and cloned constpool entries.
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
+ const CPEntry & CPE = CPEntries[i][j];
+ AFI->recordCPEClone(i, CPE.CPI);
+ }
+ }
+
+ DEBUG(errs() << '\n'; dumpBBs());
+
+ BBSizes.clear();
+ BBOffsets.clear();
+ WaterList.clear();
+ CPUsers.clear();
+ CPEntries.clear();
+ ImmBranches.clear();
+ PushPopMIs.clear();
+ T2JumpTables.clear();
+
+ return MadeChange;
+}
+
+/// DoInitialPlacement - Perform the initial placement of the constant pool
+/// entries. To start with, we put them all at the end of the function.
+void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
+ std::vector<MachineInstr*> &CPEMIs) {
+ // Create the basic block to hold the CPE's.
+ MachineBasicBlock *BB = MF.CreateMachineBasicBlock();
+ MF.push_back(BB);
+
+ // Add all of the constants from the constant pool to the end block, use an
+ // identity mapping of CPI's to CPE's.
+ const std::vector<MachineConstantPoolEntry> &CPs =
+ MF.getConstantPool()->getConstants();
+
+ const TargetData &TD = *MF.getTarget().getTargetData();
+ for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+ unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+ // Verify that all constant pool entries are a multiple of 4 bytes. If not,
+ // we would have to pad them out or something so that instructions stay
+ // aligned.
+ assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+ MachineInstr *CPEMI =
+ BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(i).addConstantPoolIndex(i).addImm(Size);
+ CPEMIs.push_back(CPEMI);
+
+ // Add a new CPEntry, but no corresponding CPUser yet.
+ std::vector<CPEntry> CPEs;
+ CPEs.push_back(CPEntry(CPEMI, i));
+ CPEntries.push_back(CPEs);
+ ++NumCPEs;
+ DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
+ << "\n");
+ }
+}
+
+/// BBHasFallthrough - Return true if the specified basic block can fallthrough
+/// into the block immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+ // Get the next machine basic block in the function.
+ MachineFunction::iterator MBBI = MBB;
+ // Can't fall off end of function.
+ if (llvm::next(MBBI) == MBB->getParent()->end())
+ return false;
+
+ MachineBasicBlock *NextBB = llvm::next(MBBI);
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I == NextBB)
+ return true;
+
+ return false;
+}
+
+/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
+/// look up the corresponding CPEntry.
+ARMConstantIslands::CPEntry
+*ARMConstantIslands::findConstPoolEntry(unsigned CPI,
+ const MachineInstr *CPEMI) {
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ // Number of entries per constpool index should be small, just do a
+ // linear search.
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ if (CPEs[i].CPEMI == CPEMI)
+ return &CPEs[i];
+ }
+ return NULL;
+}
+
+/// JumpTableFunctionScan - Do a scan of the function, building up
+/// information about the sizes of each block and the locations of all
+/// the jump tables.
+void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I)
+ if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+ T2JumpTables.push_back(I);
+ }
+}
+
+/// InitialFunctionScan - Do the initial scan of the function, building up
+/// information about the sizes of each block, the location of all the water,
+/// and finding all of the constant pool users.
+void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
+ const std::vector<MachineInstr*> &CPEMIs) {
+ // First thing, see if the function has any inline assembly in it. If so,
+ // we have to be conservative about alignment assumptions, as we don't
+ // know for sure the size of any instructions in the inline assembly.
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I)
+ if (I->getOpcode() == ARM::INLINEASM)
+ HasInlineAsm = true;
+ }
+
+ // Now go back through the instructions and build up our data structures.
+ unsigned Offset = 0;
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ // If this block doesn't fall through into the next MBB, then this is
+ // 'water' that a constant pool island could be placed.
+ if (!BBHasFallthrough(&MBB))
+ WaterList.push_back(&MBB);
+
+ unsigned MBBSize = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+ // Add instruction size to MBBSize.
+ MBBSize += TII->GetInstSizeInBytes(I);
+
+ int Opc = I->getOpcode();
+ if (I->getDesc().isBranch()) {
+ bool isCond = false;
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ int UOpc = Opc;
+ switch (Opc) {
+ default:
+ continue; // Ignore other JT branches
+ case ARM::tBR_JTr:
+ // A Thumb1 table jump may involve padding; for the offsets to
+ // be right, functions containing these must be 4-byte aligned.
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr + 2
+ // is aligned. That is held in Offset+MBBSize, which already has
+ // 2 added in for the size of the mov pc instruction.
+ MF.EnsureAlignment(2U);
+ if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
+ // FIXME: Add a pseudo ALIGN instruction instead.
+ MBBSize += 2; // padding
+ continue; // Does not get an entry in ImmBranches
+ case ARM::t2BR_JT:
+ T2JumpTables.push_back(I);
+ continue; // Does not get an entry in ImmBranches
+ case ARM::Bcc:
+ isCond = true;
+ UOpc = ARM::B;
+ // Fallthrough
+ case ARM::B:
+ Bits = 24;
+ Scale = 4;
+ break;
+ case ARM::tBcc:
+ isCond = true;
+ UOpc = ARM::tB;
+ Bits = 8;
+ Scale = 2;
+ break;
+ case ARM::tB:
+ Bits = 11;
+ Scale = 2;
+ break;
+ case ARM::t2Bcc:
+ isCond = true;
+ UOpc = ARM::t2B;
+ Bits = 20;
+ Scale = 2;
+ break;
+ case ARM::t2B:
+ Bits = 24;
+ Scale = 2;
+ break;
+ }
+
+ // Record this immediate branch.
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
+ }
+
+ if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
+ PushPopMIs.push_back(I);
+
+ if (Opc == ARM::CONSTPOOL_ENTRY)
+ continue;
+
+ // Scan the instructions for constant pool operands.
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+ if (I->getOperand(op).isCPI()) {
+ // We found one. The addressing mode tells us the max displacement
+ // from the PC that this instruction permits.
+
+ // Basic size info comes from the TSFlags field.
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ bool NegOk = false;
+ bool IsSoImm = false;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unknown addressing mode for CP reference!");
+ break;
+
+ // Taking the address of a CP entry.
+ case ARM::LEApcrel:
+ // This takes a SoImm, which is 8 bit immediate rotated. We'll
+ // pretend the maximum offset is 255 * 4. Since each instruction
+ // 4 byte wide, this is always correct. We'll check for other
+ // displacements that fits in a SoImm as well.
+ Bits = 8;
+ Scale = 4;
+ NegOk = true;
+ IsSoImm = true;
+ break;
+ case ARM::t2LEApcrel:
+ Bits = 12;
+ NegOk = true;
+ break;
+ case ARM::tLEApcrel:
+ Bits = 8;
+ Scale = 4;
+ break;
+
+ case ARM::LDRi12:
+ case ARM::LDRcp:
+ case ARM::t2LDRpci:
+ Bits = 12; // +-offset_12
+ NegOk = true;
+ break;
+
+ case ARM::tLDRpci:
+ Bits = 8;
+ Scale = 4; // +(offset_8*4)
+ break;
+
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ Bits = 8;
+ Scale = 4; // +-(offset_8*4)
+ NegOk = true;
+ break;
+ }
+
+ // Remember that this is a user of a CP entry.
+ unsigned CPI = I->getOperand(op).getIndex();
+ MachineInstr *CPEMI = CPEMIs[CPI];
+ unsigned MaxOffs = ((1 << Bits)-1) * Scale;
+ CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
+
+ // Increment corresponding CPEntry reference count.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Cannot find a corresponding CPEntry!");
+ CPE->RefCount++;
+
+ // Instructions can only use one CP entry, don't bother scanning the
+ // rest of the operands.
+ break;
+ }
+ }
+
+ // In thumb mode, if this block is a constpool island, we may need padding
+ // so it's aligned on 4 byte boundary.
+ if (isThumb &&
+ !MBB.empty() &&
+ MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+ ((Offset%4) != 0 || HasInlineAsm))
+ MBBSize += 2;
+
+ BBSizes.push_back(MBBSize);
+ BBOffsets.push_back(Offset);
+ Offset += MBBSize;
+ }
+}
+
+/// GetOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function. This offset changes as stuff is moved
+/// around inside the function.
+unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BBOffsets[MBB->getNumber()];
+
+ // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
+ // alignment padding, and compensate if so.
+ if (isThumb &&
+ MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+ (Offset%4 != 0 || HasInlineAsm))
+ Offset += 2;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ if (&*I == MI) return Offset;
+ Offset += TII->GetInstSizeInBytes(I);
+ }
+}
+
+/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+/// ID.
+static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) {
+ return LHS->getNumber() < RHS->getNumber();
+}
+
+/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
+/// machine function, it upsets all of the block numbers. Renumber the blocks
+/// and update the arrays that parallel this numbering.
+void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+ // Renumber the MBB's to keep them consecutive.
+ NewBB->getParent()->RenumberBlocks(NewBB);
+
+ // Insert a size into BBSizes to align it properly with the (newly
+ // renumbered) block numbers.
+ BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+ // Likewise for BBOffsets.
+ BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+ // Next, update WaterList. Specifically, we need to add NewMBB as having
+ // available water after it.
+ water_iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+ CompareMBBNumbers);
+ WaterList.insert(IP, NewBB);
+}
+
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
+ MachineBasicBlock *OrigBB = MI->getParent();
+ MachineFunction &MF = *OrigBB->getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MF.insert(MBBI, NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond to anything in the source.
+ unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
+ ++NumSplit;
+
+ // Update the CFG. All succs of OrigBB are now succs of NewBB.
+ while (!OrigBB->succ_empty()) {
+ MachineBasicBlock *Succ = *OrigBB->succ_begin();
+ OrigBB->removeSuccessor(Succ);
+ NewBB->addSuccessor(Succ);
+
+ // This pass should be run after register allocation, so there should be no
+ // PHI nodes to update.
+ assert((Succ->empty() || !Succ->begin()->isPHI())
+ && "PHI nodes should be eliminated by now!");
+ }
+
+ // OrigBB branches to NewBB.
+ OrigBB->addSuccessor(NewBB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ // This is almost the same as UpdateForInsertedWaterBlock, except that
+ // the Water goes after OrigBB, not NewBB.
+ MF.RenumberBlocks(NewBB);
+
+ // Insert a size into BBSizes to align it properly with the (newly
+ // renumbered) block numbers.
+ BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+ // Likewise for BBOffsets.
+ BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+ // Next, update WaterList. Specifically, we need to add OrigMBB as having
+ // available water after it (but not if it's already there, which happens
+ // when splitting before a conditional branch that is followed by an
+ // unconditional branch - in that case we want to insert NewBB).
+ water_iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
+ CompareMBBNumbers);
+ MachineBasicBlock* WaterBB = *IP;
+ if (WaterBB == OrigBB)
+ WaterList.insert(llvm::next(IP), NewBB);
+ else
+ WaterList.insert(IP, OrigBB);
+ NewWaterList.insert(OrigBB);
+
+ unsigned OrigBBI = OrigBB->getNumber();
+ unsigned NewBBI = NewBB->getNumber();
+
+ int delta = isThumb1 ? 2 : 4;
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ unsigned OrigBBSize = 0;
+ for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
+ I != E; ++I)
+ OrigBBSize += TII->GetInstSizeInBytes(I);
+ BBSizes[OrigBBI] = OrigBBSize;
+
+ // ...and adjust BBOffsets for NewBB accordingly.
+ BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ unsigned NewBBSize = 0;
+ for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+ I != E; ++I)
+ NewBBSize += TII->GetInstSizeInBytes(I);
+ // Set the size of NewBB in BBSizes. It does not include any padding now.
+ BBSizes[NewBBI] = NewBBSize;
+
+ MachineInstr* ThumbJTMI = prior(NewBB->end());
+ if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+ // We've added another 2-byte instruction before this tablejump, which
+ // means we will always need padding if we didn't before, and vice versa.
+
+ // The original offset of the jump instruction was:
+ unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
+ if (OrigOffset%4 == 0) {
+ // We had padding before and now we don't. No net change in code size.
+ delta = 0;
+ } else {
+ // We didn't have padding before and now we do.
+ BBSizes[NewBBI] += 2;
+ delta = 4;
+ }
+ }
+
+ // All BBOffsets following these blocks must be modified.
+ if (delta)
+ AdjustBBOffsetsAfter(NewBB, delta);
+
+ return NewBB;
+}
+
+/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool
+/// reference) is within MaxDisp of TrialOffset (a proposed location of a
+/// constant pool entry).
+bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
+ unsigned TrialOffset, unsigned MaxDisp,
+ bool NegativeOK, bool IsSoImm) {
+ // On Thumb offsets==2 mod 4 are rounded down by the hardware for
+ // purposes of the displacement computation; compensate for that here.
+ // Effectively, the valid range of displacements is 2 bytes smaller for such
+ // references.
+ unsigned TotalAdj = 0;
+ if (isThumb && UserOffset%4 !=0) {
+ UserOffset -= 2;
+ TotalAdj = 2;
+ }
+ // CPEs will be rounded up to a multiple of 4.
+ if (isThumb && TrialOffset%4 != 0) {
+ TrialOffset += 2;
+ TotalAdj += 2;
+ }
+
+ // In Thumb2 mode, later branch adjustments can shift instructions up and
+ // cause alignment change. In the worst case scenario this can cause the
+ // user's effective address to be subtracted by 2 and the CPE's address to
+ // be plus 2.
+ if (isThumb2 && TotalAdj != 4)
+ MaxDisp -= (4 - TotalAdj);
+
+ if (UserOffset <= TrialOffset) {
+ // User before the Trial.
+ if (TrialOffset - UserOffset <= MaxDisp)
+ return true;
+ // FIXME: Make use full range of soimm values.
+ } else if (NegativeOK) {
+ if (UserOffset - TrialOffset <= MaxDisp)
+ return true;
+ // FIXME: Make use full range of soimm values.
+ }
+ return false;
+}
+
+/// WaterIsInRange - Returns true if a CPE placed after the specified
+/// Water (a basic block) will be in range for the specific MI.
+
+bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
+ MachineBasicBlock* Water, CPUser &U) {
+ unsigned MaxDisp = U.MaxDisp;
+ unsigned CPEOffset = BBOffsets[Water->getNumber()] +
+ BBSizes[Water->getNumber()];
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction.
+ if (CPEOffset < UserOffset)
+ UserOffset += U.CPEMI->getOperand(2).getImm();
+
+ return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm);
+}
+
+/// CPEIsInRange - Returns true if the distance between specific MI and
+/// specific ConstPool entry instruction can fit in MI's displacement field.
+bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned MaxDisp,
+ bool NegOk, bool DoDump) {
+ unsigned CPEOffset = GetOffsetOf(CPEMI);
+ assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE");
+
+ if (DoDump) {
+ DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << " insn address=" << UserOffset
+ << " CPE address=" << CPEOffset
+ << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI);
+ }
+
+ return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
+}
+
+#ifndef NDEBUG
+/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
+/// unconditionally branches to its only successor.
+static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+
+ MachineBasicBlock *Succ = *MBB->succ_begin();
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ MachineInstr *PredMI = &Pred->back();
+ if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB
+ || PredMI->getOpcode() == ARM::t2B)
+ return PredMI->getOperand(0).getMBB() == Succ;
+ return false;
+}
+#endif // NDEBUG
+
+void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
+ int delta) {
+ MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
+ for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
+ i < e; ++i) {
+ BBOffsets[i] += delta;
+ // If some existing blocks have padding, adjust the padding as needed, a
+ // bit tricky. delta can be negative so don't use % on that.
+ if (!isThumb)
+ continue;
+ MachineBasicBlock *MBB = MBBI;
+ if (!MBB->empty() && !HasInlineAsm) {
+ // Constant pool entries require padding.
+ if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+ unsigned OldOffset = BBOffsets[i] - delta;
+ if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) {
+ // add new padding
+ BBSizes[i] += 2;
+ delta += 2;
+ } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) {
+ // remove existing padding
+ BBSizes[i] -= 2;
+ delta -= 2;
+ }
+ }
+ // Thumb1 jump tables require padding. They should be at the end;
+ // following unconditional branches are removed by AnalyzeBranch.
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr
+ // is aligned; if it is, the offset of the jump table following the
+ // instruction will not be aligned, and we need padding.
+ MachineInstr *ThumbJTMI = prior(MBB->end());
+ if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+ unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
+ unsigned OldMIOffset = NewMIOffset - delta;
+ if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) {
+ // remove existing padding
+ BBSizes[i] -= 2;
+ delta -= 2;
+ } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) {
+ // add new padding
+ BBSizes[i] += 2;
+ delta += 2;
+ }
+ }
+ if (delta==0)
+ return;
+ }
+ MBBI = llvm::next(MBBI);
+ }
+}
+
+/// DecrementOldEntry - find the constant pool entry with index CPI
+/// and instruction CPEMI, and decrement its refcount. If the refcount
+/// becomes 0 remove the entry and instruction. Returns true if we removed
+/// the entry, false if we didn't.
+
+bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
+ // Find the old entry. Eliminate it if it is no longer used.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Unexpected!");
+ if (--CPE->RefCount == 0) {
+ RemoveDeadCPEMI(CPEMI);
+ CPE->CPEMI = NULL;
+ --NumCPEs;
+ return true;
+ }
+ return false;
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone. Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
+{
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+
+ // Check to see if the CPE is already in-range.
+ if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) {
+ DEBUG(errs() << "In range\n");
+ return 1;
+ }
+
+ // No. Look for previously created clones of the CPE that are in range.
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ // We already tried this one
+ if (CPEs[i].CPEMI == CPEMI)
+ continue;
+ // Removing CPEs can leave empty entries, skip
+ if (CPEs[i].CPEMI == NULL)
+ continue;
+ if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) {
+ DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#"
+ << CPEs[i].CPI << "\n");
+ // Point the CPUser node to the replacement
+ U.CPEMI = CPEs[i].CPEMI;
+ // Change the CPI in the instruction operand to refer to the clone.
+ for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+ if (UserMI->getOperand(j).isCPI()) {
+ UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ break;
+ }
+ // Adjust the refcount of the clone...
+ CPEs[i].RefCount++;
+ // ...and the original. If we didn't remove the old entry, none of the
+ // addresses changed, so we don't need another pass.
+ return DecrementOldEntry(CPI, CPEMI) ? 2 : 1;
+ }
+ }
+ return 0;
+}
+
+/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
+/// the specific unconditional branch instruction.
+static inline unsigned getUnconditionalBrDisp(int Opc) {
+ switch (Opc) {
+ case ARM::tB:
+ return ((1<<10)-1)*2;
+ case ARM::t2B:
+ return ((1<<23)-1)*2;
+ default:
+ break;
+ }
+
+ return ((1<<23)-1)*4;
+}
+
+/// LookForWater - Look for an existing entry in the WaterList in which
+/// we can place the CPE referenced from U so it's within range of U's MI.
+/// Returns true if found, false if not. If it returns true, WaterIter
+/// is set to the WaterList entry. For Thumb, prefer water that will not
+/// introduce padding to water that will. To ensure that this pass
+/// terminates, the CPE location for a particular CPUser is only allowed to
+/// move to a lower address, so search backward from the end of the list and
+/// prefer the first water that is in range.
+bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
+ water_iterator &WaterIter) {
+ if (WaterList.empty())
+ return false;
+
+ bool FoundWaterThatWouldPad = false;
+ water_iterator IPThatWouldPad;
+ for (water_iterator IP = prior(WaterList.end()),
+ B = WaterList.begin();; --IP) {
+ MachineBasicBlock* WaterBB = *IP;
+ // Check if water is in range and is either at a lower address than the
+ // current "high water mark" or a new water block that was created since
+ // the previous iteration by inserting an unconditional branch. In the
+ // latter case, we want to allow resetting the high water mark back to
+ // this new water since we haven't seen it before. Inserting branches
+ // should be relatively uncommon and when it does happen, we want to be
+ // sure to take advantage of it for all the CPEs near that block, so that
+ // we don't insert more branches than necessary.
+ if (WaterIsInRange(UserOffset, WaterBB, U) &&
+ (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
+ NewWaterList.count(WaterBB))) {
+ unsigned WBBId = WaterBB->getNumber();
+ if (isThumb &&
+ (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
+ // This is valid Water, but would introduce padding. Remember
+ // it in case we don't find any Water that doesn't do this.
+ if (!FoundWaterThatWouldPad) {
+ FoundWaterThatWouldPad = true;
+ IPThatWouldPad = IP;
+ }
+ } else {
+ WaterIter = IP;
+ return true;
+ }
+ }
+ if (IP == B)
+ break;
+ }
+ if (FoundWaterThatWouldPad) {
+ WaterIter = IPThatWouldPad;
+ return true;
+ }
+ return false;
+}
+
+/// CreateNewWater - No existing WaterList entry will work for
+/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the
+/// block is used if in range, and the conditional branch munged so control
+/// flow is correct. Otherwise the block is split to create a hole with an
+/// unconditional branch around it. In either case NewMBB is set to a
+/// block following which the new island can be inserted (the WaterList
+/// is not adjusted).
+void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
+ unsigned UserOffset,
+ MachineBasicBlock *&NewMBB) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ MachineBasicBlock *UserMBB = UserMI->getParent();
+ unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
+ BBSizes[UserMBB->getNumber()];
+ assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+
+ // If the block does not end in an unconditional branch already, and if the
+ // end of the block is within range, make new water there. (The addition
+ // below is for the unconditional branch we will be adding: 4 bytes on ARM +
+ // Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for
+ // inside OffsetIsInRange.
+ if (BBHasFallthrough(UserMBB) &&
+ OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
+ U.MaxDisp, U.NegOk, U.IsSoImm)) {
+ DEBUG(errs() << "Split at end of block\n");
+ if (&UserMBB->back() == UserMI)
+ assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
+ NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+ // Add an unconditional branch from UserMBB to fallthrough block.
+ // Record it for branch lengthening; this new branch will not get out of
+ // range, but if the preceding conditional branch is out of range, the
+ // targets will be exchanged, and the altered branch may be out of
+ // range, so the machinery has to know about it.
+ int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+ MaxDisp, false, UncondBr));
+ int delta = isThumb1 ? 2 : 4;
+ BBSizes[UserMBB->getNumber()] += delta;
+ AdjustBBOffsetsAfter(UserMBB, delta);
+ } else {
+ // What a big block. Find a place within the block to split it.
+ // This is a little tricky on Thumb1 since instructions are 2 bytes
+ // and constant pool entries are 4 bytes: if instruction I references
+ // island CPE, and instruction I+1 references CPE', it will
+ // not work well to put CPE as far forward as possible, since then
+ // CPE' cannot immediately follow it (that location is 2 bytes
+ // farther away from I+1 than CPE was from I) and we'd need to create
+ // a new island. So, we make a first guess, then walk through the
+ // instructions between the one currently being looked at and the
+ // possible insertion point, and make sure any other instructions
+ // that reference CPEs will be able to use the same island area;
+ // if not, we back up the insertion point.
+
+ // The 4 in the following is for the unconditional branch we'll be
+ // inserting (allows for long branch on Thumb1). Alignment of the
+ // island is handled inside OffsetIsInRange.
+ unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
+ // This could point off the end of the block if we've already got
+ // constant pool entries following this block; only the last one is
+ // in the water list. Back past any possible branches (allow for a
+ // conditional and a maximally long unconditional).
+ if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
+ BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
+ (isThumb1 ? 6 : 8);
+ unsigned EndInsertOffset = BaseInsertOffset +
+ CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ MachineInstr *LastIT = 0;
+ for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ Offset < BaseInsertOffset;
+ Offset += TII->GetInstSizeInBytes(MI),
+ MI = llvm::next(MI)) {
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+ CPUser &U = CPUsers[CPUIndex];
+ if (!OffsetIsInRange(Offset, EndInsertOffset,
+ U.MaxDisp, U.NegOk, U.IsSoImm)) {
+ BaseInsertOffset -= (isThumb1 ? 2 : 4);
+ EndInsertOffset -= (isThumb1 ? 2 : 4);
+ }
+ // This is overly conservative, as we don't account for CPEMIs
+ // being reused within the block, but it doesn't matter much.
+ EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
+ CPUIndex++;
+ }
+
+ // Remember the last IT instruction.
+ if (MI->getOpcode() == ARM::t2IT)
+ LastIT = MI;
+ }
+
+ DEBUG(errs() << "Split in middle of big block\n");
+ --MI;
+
+ // Avoid splitting an IT block.
+ if (LastIT) {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ if (CC != ARMCC::AL)
+ MI = LastIT;
+ }
+ NewMBB = SplitBlockBeforeInstr(MI);
+ }
+}
+
+/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
+/// is out-of-range. If so, pick up the constant pool value and move it some
+/// place in-range. Return true if we changed any addresses (thus must run
+/// another pass of branch lengthening), false otherwise.
+bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
+ unsigned CPUserIndex) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ // Compute this only once, it's expensive. The 4 or 8 is the value the
+ // hardware keeps in the PC.
+ unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8);
+
+ // See if the current entry is within range, or there is a clone of it
+ // in range.
+ int result = LookForExistingCPEntry(U, UserOffset);
+ if (result==1) return false;
+ else if (result==2) return true;
+
+ // No existing clone of this CPE is within range.
+ // We will be generating a new clone. Get a UID for it.
+ unsigned ID = AFI->createPICLabelUId();
+
+ // Look for water where we can place this CPE.
+ MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *NewMBB;
+ water_iterator IP;
+ if (LookForWater(U, UserOffset, IP)) {
+ DEBUG(errs() << "found water in range\n");
+ MachineBasicBlock *WaterBB = *IP;
+
+ // If the original WaterList entry was "new water" on this iteration,
+ // propagate that to the new island. This is just keeping NewWaterList
+ // updated to match the WaterList, which will be updated below.
+ if (NewWaterList.count(WaterBB)) {
+ NewWaterList.erase(WaterBB);
+ NewWaterList.insert(NewIsland);
+ }
+ // The new CPE goes before the following block (NewMBB).
+ NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
+
+ } else {
+ // No water found.
+ DEBUG(errs() << "No water found\n");
+ CreateNewWater(CPUserIndex, UserOffset, NewMBB);
+
+ // SplitBlockBeforeInstr adds to WaterList, which is important when it is
+ // called while handling branches so that the water will be seen on the
+ // next iteration for constant pools, but in this context, we don't want
+ // it. Check for this so it will be removed from the WaterList.
+ // Also remove any entry from NewWaterList.
+ MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB));
+ IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
+ if (IP != WaterList.end())
+ NewWaterList.erase(WaterBB);
+
+ // We are adding new water. Update NewWaterList.
+ NewWaterList.insert(NewIsland);
+ }
+
+ // Remove the original WaterList entry; we want subsequent insertions in
+ // this vicinity to go after the one we're about to insert. This
+ // considerably reduces the number of times we have to move the same CPE
+ // more than once and is also important to ensure the algorithm terminates.
+ if (IP != WaterList.end())
+ WaterList.erase(IP);
+
+ // Okay, we know we can put an island before NewMBB now, do it!
+ MF.insert(NewMBB, NewIsland);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ UpdateForInsertedWaterBlock(NewIsland);
+
+ // Decrement the old entry, and remove it if refcount becomes 0.
+ DecrementOldEntry(CPI, CPEMI);
+
+ // Now that we have an island to add the CPE to, clone the original CPE and
+ // add it to the island.
+ U.HighWaterMark = NewIsland;
+ U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+ CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+ ++NumCPEs;
+
+ BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
+ // Compensate for .align 2 in thumb mode.
+ if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm))
+ Size += 2;
+ // Increase the size of the island block to account for the new entry.
+ BBSizes[NewIsland->getNumber()] += Size;
+ AdjustBBOffsetsAfter(NewIsland, Size);
+
+ // Finally, change the CPI in the instruction operand to be ID.
+ for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+ if (UserMI->getOperand(i).isCPI()) {
+ UserMI->getOperand(i).setIndex(ID);
+ break;
+ }
+
+ DEBUG(errs() << " Moved CPE to #" << ID << " CPI=" << CPI
+ << '\t' << *UserMI);
+
+ return true;
+}
+
+/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// sizes and offsets of impacted basic blocks.
+void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
+ MachineBasicBlock *CPEBB = CPEMI->getParent();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ CPEMI->eraseFromParent();
+ BBSizes[CPEBB->getNumber()] -= Size;
+ // All succeeding offsets have the current size value added in, fix this.
+ if (CPEBB->empty()) {
+ // In thumb1 mode, the size of island may be padded by two to compensate for
+ // the alignment requirement. Then it will now be 2 when the block is
+ // empty, so fix this.
+ // All succeeding offsets have the current size value added in, fix this.
+ if (BBSizes[CPEBB->getNumber()] != 0) {
+ Size += BBSizes[CPEBB->getNumber()];
+ BBSizes[CPEBB->getNumber()] = 0;
+ }
+ }
+ AdjustBBOffsetsAfter(CPEBB, -Size);
+ // An island has only one predecessor BB and one successor BB. Check if
+ // this BB's predecessor jumps directly to this BB's successor. This
+ // shouldn't happen currently.
+ assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
+ // FIXME: remove the empty blocks after all the work is done?
+}
+
+/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts
+/// are zero.
+bool ARMConstantIslands::RemoveUnusedCPEntries() {
+ unsigned MadeChange = false;
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ std::vector<CPEntry> &CPEs = CPEntries[i];
+ for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
+ if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
+ RemoveDeadCPEMI(CPEs[j].CPEMI);
+ CPEs[j].CPEMI = NULL;
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+/// BBIsInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
+ unsigned MaxDisp) {
+ unsigned PCAdj = isThumb ? 4 : 8;
+ unsigned BrOffset = GetOffsetOf(MI) + PCAdj;
+ unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+
+ DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber()
+ << " from BB#" << MI->getParent()->getNumber()
+ << " max delta=" << MaxDisp
+ << " from " << GetOffsetOf(MI) << " to " << DestOffset
+ << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
+
+ if (BrOffset <= DestOffset) {
+ // Branch before the Dest.
+ if (DestOffset-BrOffset <= MaxDisp)
+ return true;
+ } else {
+ if (BrOffset-DestOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
+/// away to fit in its displacement field.
+bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Check to see if the DestBB is already in-range.
+ if (BBIsInRange(MI, DestBB, Br.MaxDisp))
+ return false;
+
+ if (!Br.isCond)
+ return FixUpUnconditionalBr(MF, Br);
+ return FixUpConditionalBr(MF, Br);
+}
+
+/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
+/// too far away to fit in its displacement field. If the LR register has been
+/// spilled in the epilogue, then we can use BL to implement a far jump.
+/// Otherwise, add an intermediate branch instruction to a branch.
+bool
+ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ if (!isThumb1)
+ llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!");
+
+ // Use BL to implement far jump.
+ Br.MaxDisp = (1 << 21) * 2;
+ MI->setDesc(TII->get(ARM::tBfar));
+ BBSizes[MBB->getNumber()] += 2;
+ AdjustBBOffsetsAfter(MBB, 2);
+ HasFarJump = true;
+ ++NumUBrFixed;
+
+ DEBUG(errs() << " Changed B to long jump " << *MI);
+
+ return true;
+}
+
+/// FixUpConditionalBr - Fix up a conditional branch whose destination is too
+/// far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool
+ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Add an unconditional branch to the destination and invert the branch
+ // condition to jump over it:
+ // blt L1
+ // =>
+ // bge L2
+ // b L1
+ // L2:
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
+ CC = ARMCC::getOppositeCondition(CC);
+ unsigned CCReg = MI->getOperand(2).getReg();
+
+ // If the branch is at the end of its MBB and that has a fall-through block,
+ // direct the updated conditional branch to the fall-through block. Otherwise,
+ // split the MBB before the next instruction.
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *BMI = &MBB->back();
+ bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+ ++NumCBrFixed;
+ if (BMI != MI) {
+ if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+ BMI->getOpcode() == Br.UncondBr) {
+ // Last MI in the BB is an unconditional branch. Can we simply invert the
+ // condition and swap destinations:
+ // beq L1
+ // b L2
+ // =>
+ // bne L2
+ // b L1
+ MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+ if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
+ DEBUG(errs() << " Invert Bcc condition and swap its destination with "
+ << *BMI);
+ BMI->getOperand(0).setMBB(DestBB);
+ MI->getOperand(0).setMBB(NewDest);
+ MI->getOperand(1).setImm(CC);
+ return true;
+ }
+ }
+ }
+
+ if (NeedSplit) {
+ SplitBlockBeforeInstr(MI);
+ // No need for the branch to the next block. We're adding an unconditional
+ // branch to the destination.
+ int delta = TII->GetInstSizeInBytes(&MBB->back());
+ BBSizes[MBB->getNumber()] -= delta;
+ MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
+ AdjustBBOffsetsAfter(SplitBB, -delta);
+ MBB->back().eraseFromParent();
+ // BBOffsets[SplitBB] is wrong temporarily, fixed below
+ }
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+
+ DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber()
+ << " also invert condition and change dest. to BB#"
+ << NextBB->getNumber() << "\n");
+
+ // Insert a new conditional branch and a new unconditional branch.
+ // Also update the ImmBranch as well as adding a new entry for the new branch.
+ BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
+ .addMBB(NextBB).addImm(CC).addReg(CCReg);
+ Br.MI = &MBB->back();
+ BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+ BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
+ ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
+
+ // Remove the old conditional branch. It may or may not still be in MBB.
+ BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
+ MI->eraseFromParent();
+
+ // The net size change is an addition of one unconditional branch.
+ int delta = TII->GetInstSizeInBytes(&MBB->back());
+ AdjustBBOffsetsAfter(MBB, delta);
+ return true;
+}
+
+/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills
+/// LR / restores LR to pc. FIXME: This is done here because it's only possible
+/// to do this if tBfar is not used.
+bool ARMConstantIslands::UndoLRSpillRestore() {
+ bool MadeChange = false;
+ for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
+ MachineInstr *MI = PushPopMIs[i];
+ // First two operands are predicates.
+ if (MI->getOpcode() == ARM::tPOP_RET &&
+ MI->getOperand(2).getReg() == ARM::PC &&
+ MI->getNumExplicitOperands() == 3) {
+ BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET));
+ MI->eraseFromParent();
+ MadeChange = true;
+ }
+ }
+ return MadeChange;
+}
+
+bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ // Shrink ADR and LDR from constantpool.
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+ CPUser &U = CPUsers[i];
+ unsigned Opcode = U.MI->getOpcode();
+ unsigned NewOpc = 0;
+ unsigned Scale = 1;
+ unsigned Bits = 0;
+ switch (Opcode) {
+ default: break;
+ case ARM::t2LEApcrel:
+ if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+ NewOpc = ARM::tLEApcrel;
+ Bits = 8;
+ Scale = 4;
+ }
+ break;
+ case ARM::t2LDRpci:
+ if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+ NewOpc = ARM::tLDRpci;
+ Bits = 8;
+ Scale = 4;
+ }
+ break;
+ }
+
+ if (!NewOpc)
+ continue;
+
+ unsigned UserOffset = GetOffsetOf(U.MI) + 4;
+ unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
+ // FIXME: Check if offset is multiple of scale if scale is not 4.
+ if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+ U.MI->setDesc(TII->get(NewOpc));
+ MachineBasicBlock *MBB = U.MI->getParent();
+ BBSizes[MBB->getNumber()] -= 2;
+ AdjustBBOffsetsAfter(MBB, -2);
+ ++NumT2CPShrunk;
+ MadeChange = true;
+ }
+ }
+
+ MadeChange |= OptimizeThumb2Branches(MF);
+ MadeChange |= OptimizeThumb2JumpTables(MF);
+ return MadeChange;
+}
+
+bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
+ ImmBranch &Br = ImmBranches[i];
+ unsigned Opcode = Br.MI->getOpcode();
+ unsigned NewOpc = 0;
+ unsigned Scale = 1;
+ unsigned Bits = 0;
+ switch (Opcode) {
+ default: break;
+ case ARM::t2B:
+ NewOpc = ARM::tB;
+ Bits = 11;
+ Scale = 2;
+ break;
+ case ARM::t2Bcc: {
+ NewOpc = ARM::tBcc;
+ Bits = 8;
+ Scale = 2;
+ break;
+ }
+ }
+ if (NewOpc) {
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
+ Br.MI->setDesc(TII->get(NewOpc));
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ BBSizes[MBB->getNumber()] -= 2;
+ AdjustBBOffsetsAfter(MBB, -2);
+ ++NumT2BrShrunk;
+ MadeChange = true;
+ }
+ }
+
+ Opcode = Br.MI->getOpcode();
+ if (Opcode != ARM::tBcc)
+ continue;
+
+ NewOpc = 0;
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg);
+ if (Pred == ARMCC::EQ)
+ NewOpc = ARM::tCBZ;
+ else if (Pred == ARMCC::NE)
+ NewOpc = ARM::tCBNZ;
+ if (!NewOpc)
+ continue;
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ // Check if the distance is within 126. Subtract starting offset by 2
+ // because the cmp will be eliminated.
+ unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
+ unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+ if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
+ MachineBasicBlock::iterator CmpMI = Br.MI; --CmpMI;
+ if (CmpMI->getOpcode() == ARM::tCMPi8) {
+ unsigned Reg = CmpMI->getOperand(0).getReg();
+ Pred = llvm::getInstrPredicate(CmpMI, PredReg);
+ if (Pred == ARMCC::AL &&
+ CmpMI->getOperand(1).getImm() == 0 &&
+ isARMLowRegister(Reg)) {
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ MachineInstr *NewBR =
+ BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(Reg).addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags());
+ CmpMI->eraseFromParent();
+ Br.MI->eraseFromParent();
+ Br.MI = NewBR;
+ BBSizes[MBB->getNumber()] -= 2;
+ AdjustBBOffsetsAfter(MBB, -2);
+ ++NumCBZ;
+ MadeChange = true;
+ }
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
+/// jumptables when it's possible.
+bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ // FIXME: After the tables are shrunk, can we get rid some of the
+ // constantpool tables?
+ MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ if (MJTI == 0) return false;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+ MachineInstr *MI = T2JumpTables[i];
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned NumOps = TID.getNumOperands();
+ unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2);
+ MachineOperand JTOP = MI->getOperand(JTOpIdx);
+ unsigned JTI = JTOP.getIndex();
+ assert(JTI < JT.size());
+
+ bool ByteOk = true;
+ bool HalfWordOk = true;
+ unsigned JTOffset = GetOffsetOf(MI) + 4;
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *MBB = JTBBs[j];
+ unsigned DstOffset = BBOffsets[MBB->getNumber()];
+ // Negative offset is not ok. FIXME: We should change BB layout to make
+ // sure all the branches are forward.
+ if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
+ ByteOk = false;
+ unsigned TBHLimit = ((1<<16)-1)*2;
+ if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit)
+ HalfWordOk = false;
+ if (!ByteOk && !HalfWordOk)
+ break;
+ }
+
+ if (ByteOk || HalfWordOk) {
+ MachineBasicBlock *MBB = MI->getParent();
+ unsigned BaseReg = MI->getOperand(0).getReg();
+ bool BaseRegKill = MI->getOperand(0).isKill();
+ if (!BaseRegKill)
+ continue;
+ unsigned IdxReg = MI->getOperand(1).getReg();
+ bool IdxRegKill = MI->getOperand(1).isKill();
+
+ // Scan backwards to find the instruction that defines the base
+ // register. Due to post-RA scheduling, we can't count on it
+ // immediately preceding the branch instruction.
+ MachineBasicBlock::iterator PrevI = MI;
+ MachineBasicBlock::iterator B = MBB->begin();
+ while (PrevI != B && !PrevI->definesRegister(BaseReg))
+ --PrevI;
+
+ // If for some reason we didn't find it, we can't do anything, so
+ // just skip this one.
+ if (!PrevI->definesRegister(BaseReg))
+ continue;
+
+ MachineInstr *AddrMI = PrevI;
+ bool OptOk = true;
+ // Examine the instruction that calculates the jumptable entry address.
+ // Make sure it only defines the base register and kills any uses
+ // other than the index register.
+ for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
+ const MachineOperand &MO = AddrMI->getOperand(k);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && MO.getReg() != BaseReg) {
+ OptOk = false;
+ break;
+ }
+ if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) {
+ OptOk = false;
+ break;
+ }
+ }
+ if (!OptOk)
+ continue;
+
+ // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
+ // that gave us the initial base register definition.
+ for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI)
+ ;
+
+ // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
+ // to delete it as well.
+ MachineInstr *LeaMI = PrevI;
+ if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
+ LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
+ LeaMI->getOperand(0).getReg() != BaseReg)
+ OptOk = false;
+
+ if (!OptOk)
+ continue;
+
+ unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
+ MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
+ .addReg(IdxReg, getKillRegState(IdxRegKill))
+ .addJumpTableIndex(JTI, JTOP.getTargetFlags())
+ .addImm(MI->getOperand(JTOpIdx+1).getImm());
+ // FIXME: Insert an "ALIGN" instruction to ensure the next instruction
+ // is 2-byte aligned. For now, asm printer will fix it up.
+ unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
+ unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI);
+ OrigSize += TII->GetInstSizeInBytes(LeaMI);
+ OrigSize += TII->GetInstSizeInBytes(MI);
+
+ AddrMI->eraseFromParent();
+ LeaMI->eraseFromParent();
+ MI->eraseFromParent();
+
+ int delta = OrigSize - NewSize;
+ BBSizes[MBB->getNumber()] -= delta;
+ AdjustBBOffsetsAfter(MBB, -delta);
+
+ ++NumTBs;
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
+/// jump tables always branch forwards, since that's what tbb and tbh need.
+bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ if (MJTI == 0) return false;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+ MachineInstr *MI = T2JumpTables[i];
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned NumOps = TID.getNumOperands();
+ unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2);
+ MachineOperand JTOP = MI->getOperand(JTOpIdx);
+ unsigned JTI = JTOP.getIndex();
+ assert(JTI < JT.size());
+
+ // We prefer if target blocks for the jump table come after the jump
+ // instruction so we can use TB[BH]. Loop through the target blocks
+ // and try to adjust them such that that's true.
+ int JTNumber = MI->getParent()->getNumber();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *MBB = JTBBs[j];
+ int DTNumber = MBB->getNumber();
+
+ if (DTNumber < JTNumber) {
+ // The destination precedes the switch. Try to move the block forward
+ // so we have a positive offset.
+ MachineBasicBlock *NewBB =
+ AdjustJTTargetBlockForward(MBB, MI->getParent());
+ if (NewBB)
+ MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+MachineBasicBlock *ARMConstantIslands::
+AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
+{
+ MachineFunction &MF = *BB->getParent();
+
+ // If the destination block is terminated by an unconditional branch,
+ // try to move it; otherwise, create a new block following the jump
+ // table that branches back to the actual target. This is a very simple
+ // heuristic. FIXME: We can definitely improve it.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ SmallVector<MachineOperand, 4> CondPrior;
+ MachineFunction::iterator BBi = BB;
+ MachineFunction::iterator OldPrior = prior(BBi);
+
+ // If the block terminator isn't analyzable, don't try to move the block
+ bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
+
+ // If the block ends in an unconditional branch, move it. The prior block
+ // has to have an analyzable terminator for us to move this one. Be paranoid
+ // and make sure we're not trying to move the entry block of the function.
+ if (!B && Cond.empty() && BB != MF.begin() &&
+ !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
+ BB->moveAfter(JTBB);
+ OldPrior->updateTerminator();
+ BB->updateTerminator();
+ // Update numbering to account for the block being moved.
+ MF.RenumberBlocks();
+ ++NumJTMoved;
+ return NULL;
+ }
+
+ // Create a new MBB for the code after the jump BB.
+ MachineBasicBlock *NewBB =
+ MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+ MachineFunction::iterator MBBI = JTBB; ++MBBI;
+ MF.insert(MBBI, NewBB);
+
+ // Add an unconditional branch from NewBB to BB.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond directly to anything in the source.
+ assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
+ BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ MF.RenumberBlocks(NewBB);
+
+ // Update the CFG.
+ NewBB->addSuccessor(BB);
+ JTBB->removeSuccessor(BB);
+ JTBB->addSuccessor(NewBB);
+
+ ++NumJTInserted;
+ return NewBB;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
new file mode 100644
index 0000000..165a1d8
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -0,0 +1,130 @@
+//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
+using namespace llvm;
+
+ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id,
+ ARMCP::ARMCPKind K,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modif,
+ bool AddCA)
+ : MachineConstantPoolValue((const Type*)cval->getType()),
+ CVal(cval), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj),
+ Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C,
+ const char *s, unsigned id,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modif,
+ bool AddCA)
+ : MachineConstantPoolValue((const Type*)Type::getInt32Ty(C)),
+ CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol),
+ PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv,
+ ARMCP::ARMCPModifier Modif)
+ : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())),
+ CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0),
+ Modifier(Modif), AddCurrentAddress(false) {}
+
+const GlobalValue *ARMConstantPoolValue::getGV() const {
+ return dyn_cast_or_null<GlobalValue>(CVal);
+}
+
+const BlockAddress *ARMConstantPoolValue::getBlockAddress() const {
+ return dyn_cast_or_null<BlockAddress>(CVal);
+}
+
+static bool CPV_streq(const char *S1, const char *S2) {
+ if (S1 == S2)
+ return true;
+ if (S1 && S2 && strcmp(S1, S2) == 0)
+ return true;
+ return false;
+}
+
+int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ if (CPV->CVal == CVal &&
+ CPV->LabelId == LabelId &&
+ CPV->PCAdjust == PCAdjust &&
+ CPV_streq(CPV->S, S) &&
+ CPV->Modifier == Modifier)
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+ARMConstantPoolValue::~ARMConstantPoolValue() {
+ free((void*)S);
+}
+
+void
+ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(CVal);
+ ID.AddPointer(S);
+ ID.AddInteger(LabelId);
+ ID.AddInteger(PCAdjust);
+}
+
+bool
+ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
+ if (ACPV->Kind == Kind &&
+ ACPV->CVal == CVal &&
+ ACPV->PCAdjust == PCAdjust &&
+ CPV_streq(ACPV->S, S) &&
+ ACPV->Modifier == Modifier) {
+ if (ACPV->LabelId == LabelId)
+ return true;
+ // Two PC relative constpool entries containing the same GV address or
+ // external symbols. FIXME: What about blockaddress?
+ if (Kind == ARMCP::CPValue || Kind == ARMCP::CPExtSymbol)
+ return true;
+ }
+ return false;
+}
+
+void ARMConstantPoolValue::dump() const {
+ errs() << " " << *this;
+}
+
+
+void ARMConstantPoolValue::print(raw_ostream &O) const {
+ if (CVal)
+ O << CVal->getName();
+ else
+ O << S;
+ if (Modifier) O << "(" << getModifierText() << ")";
+ if (PCAdjust != 0) {
+ O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
+ if (AddCurrentAddress) O << "-.";
+ O << ")";
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
new file mode 100644
index 0000000..d008811
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -0,0 +1,122 @@
+//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstddef>
+
+namespace llvm {
+
+class Constant;
+class BlockAddress;
+class GlobalValue;
+class LLVMContext;
+
+namespace ARMCP {
+ enum ARMCPKind {
+ CPValue,
+ CPExtSymbol,
+ CPBlockAddress,
+ CPLSDA
+ };
+
+ enum ARMCPModifier {
+ no_modifier,
+ TLSGD,
+ GOT,
+ GOTOFF,
+ GOTTPOFF,
+ TPOFF
+ };
+}
+
+/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
+/// represent PC-relative displacement between the address of the load
+/// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)).
+class ARMConstantPoolValue : public MachineConstantPoolValue {
+ const Constant *CVal; // Constant being loaded.
+ const char *S; // ExtSymbol being loaded.
+ unsigned LabelId; // Label id of the load.
+ ARMCP::ARMCPKind Kind; // Kind of constant.
+ unsigned char PCAdjust; // Extra adjustment if constantpool is pc-relative.
+ // 8 for ARM, 4 for Thumb.
+ ARMCP::ARMCPModifier Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8))
+ bool AddCurrentAddress;
+
+public:
+ ARMConstantPoolValue(const Constant *cval, unsigned id,
+ ARMCP::ARMCPKind Kind = ARMCP::CPValue,
+ unsigned char PCAdj = 0,
+ ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
+ bool AddCurrentAddress = false);
+ ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id,
+ unsigned char PCAdj = 0,
+ ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
+ bool AddCurrentAddress = false);
+ ARMConstantPoolValue(const GlobalValue *GV, ARMCP::ARMCPModifier Modifier);
+ ARMConstantPoolValue();
+ ~ARMConstantPoolValue();
+
+ const GlobalValue *getGV() const;
+ const char *getSymbol() const { return S; }
+ const BlockAddress *getBlockAddress() const;
+ ARMCP::ARMCPModifier getModifier() const { return Modifier; }
+ const char *getModifierText() const {
+ switch (Modifier) {
+ default: llvm_unreachable("Unknown modifier!");
+ // FIXME: Are these case sensitive? It'd be nice to lower-case all the
+ // strings if that's legal.
+ case ARMCP::no_modifier: return "none";
+ case ARMCP::TLSGD: return "tlsgd";
+ case ARMCP::GOT: return "GOT";
+ case ARMCP::GOTOFF: return "GOTOFF";
+ case ARMCP::GOTTPOFF: return "gottpoff";
+ case ARMCP::TPOFF: return "tpoff";
+ }
+ }
+ bool hasModifier() const { return Modifier != ARMCP::no_modifier; }
+ bool mustAddCurrentAddress() const { return AddCurrentAddress; }
+ unsigned getLabelId() const { return LabelId; }
+ unsigned char getPCAdjustment() const { return PCAdjust; }
+ bool isGlobalValue() const { return Kind == ARMCP::CPValue; }
+ bool isExtSymbol() const { return Kind == ARMCP::CPExtSymbol; }
+ bool isBlockAddress() { return Kind == ARMCP::CPBlockAddress; }
+ bool isLSDA() { return Kind == ARMCP::CPLSDA; }
+
+ virtual unsigned getRelocationInfo() const { return 2; }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ /// hasSameValue - Return true if this ARM constpool value
+ /// can share the same constantpool entry as another ARM constpool value.
+ bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ void print(raw_ostream *O) const { if (O) print(*O); }
+ void print(raw_ostream &O) const;
+ void dump() const;
+};
+
+inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
+ V.print(O);
+ return O;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp
new file mode 100644
index 0000000..51e68b4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp
@@ -0,0 +1,83 @@
+//===-- ARMELFWriterInfo.cpp - ELF Writer Info for the ARM backend --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMELFWriterInfo.h"
+#include "ARMRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implementation of the ARMELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM)
+ : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64,
+ TM.getTargetData()->isLittleEndian()) {
+}
+
+ARMELFWriterInfo::~ARMELFWriterInfo() {}
+
+unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+ switch (MachineRelTy) {
+ case ARM::reloc_arm_absolute:
+ case ARM::reloc_arm_relative:
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ case ARM::reloc_arm_machine_cp_entry:
+ case ARM::reloc_arm_jt_base:
+ case ARM::reloc_arm_pic_jt:
+ assert(0 && "unsupported ARM relocation type"); break;
+
+ case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; break;
+ case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; break;
+ case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; break;
+ default:
+ llvm_unreachable("unknown ARM relocation type"); break;
+ }
+ return 0;
+}
+
+long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier) const {
+ assert(0 && "ARMELFWriterInfo::getDefaultAddendForRelTy() not implemented");
+ return 0;
+}
+
+unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+ assert(0 && "ARMELFWriterInfo::getRelocationTySize() not implemented");
+ return 0;
+}
+
+bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+ assert(0 && "ARMELFWriterInfo::isPCRelativeRel() not implemented");
+ return 1;
+}
+
+unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+ assert(0 &&
+ "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
+ return 0;
+}
+
+long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const {
+ assert(0 &&
+ "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h
new file mode 100644
index 0000000..1c4e532
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h
@@ -0,0 +1,58 @@
+//===-- ARMELFWriterInfo.h - ELF Writer Info for ARM ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_ELF_WRITER_INFO_H
+#define ARM_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+ class ARMELFWriterInfo : public TargetELFWriterInfo {
+ public:
+ ARMELFWriterInfo(TargetMachine &TM);
+ virtual ~ARMELFWriterInfo();
+
+ /// getRelocationType - Returns the target specific ELF Relocation type.
+ /// 'MachineRelTy' contains the object code independent relocation type
+ virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+ /// hasRelocationAddend - True if the target uses an addend in the
+ /// ELF relocation entry.
+ virtual bool hasRelocationAddend() const { return false; }
+
+ /// getDefaultAddendForRelTy - Gets the default addend value for a
+ /// relocation entry based on the target ELF relocation type.
+ virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier = 0) const;
+
+ /// getRelTySize - Returns the size of relocatable field in bits
+ virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+ /// isPCRelativeRel - True if the relocation type is pc relative
+ virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+ /// getJumpTableRelocationTy - Returns the machine relocation type used
+ /// to reference a jumptable.
+ virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+ /// computeRelocation - Some relocatable fields could be relocated
+ /// directly, avoiding the relocation symbol emission, compute the
+ /// final relocation value for this symbol.
+ virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+ unsigned RelTy) const;
+ };
+
+} // end llvm namespace
+
+#endif // ARM_ELF_WRITER_INFO_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
new file mode 100644
index 0000000..bd753d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -0,0 +1,1242 @@
+//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions to allow proper scheduling, if-conversion, and other late
+// optimizations. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-pseudo"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
+using namespace llvm;
+
+namespace {
+ class ARMExpandPseudo : public MachineFunctionPass {
+ public:
+ static char ID;
+ ARMExpandPseudo() : MachineFunctionPass(ID) {}
+
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
+ ARMFunctionInfo *AFI;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM pseudo instruction expansion pass";
+ }
+
+ private:
+ void TransferImpOps(MachineInstr &OldMI,
+ MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
+ bool ExpandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
+ bool ExpandMBB(MachineBasicBlock &MBB);
+ void ExpandVLD(MachineBasicBlock::iterator &MBBI);
+ void ExpandVST(MachineBasicBlock::iterator &MBBI);
+ void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
+ void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool IsExt, unsigned NumRegs);
+ void ExpandMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
+ };
+ char ARMExpandPseudo::ID = 0;
+}
+
+/// TransferImpOps - Transfer implicit operands on the pseudo instruction to
+/// the instructions created from the expansion.
+void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
+ MachineInstrBuilder &UseMI,
+ MachineInstrBuilder &DefMI) {
+ const TargetInstrDesc &Desc = OldMI.getDesc();
+ for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = OldMI.getOperand(i);
+ assert(MO.isReg() && MO.getReg());
+ if (MO.isUse())
+ UseMI.addOperand(MO);
+ else
+ DefMI.addOperand(MO);
+ }
+}
+
+namespace {
+ // Constants for register spacing in NEON load/store instructions.
+ // For quad-register load-lane and store-lane pseudo instructors, the
+ // spacing is initially assumed to be EvenDblSpc, and that is changed to
+ // OddDblSpc depending on the lane number operand.
+ enum NEONRegSpacing {
+ SingleSpc,
+ EvenDblSpc,
+ OddDblSpc
+ };
+
+ // Entries for NEON load/store information table. The table is sorted by
+ // PseudoOpc for fast binary-search lookups.
+ struct NEONLdStTableEntry {
+ unsigned PseudoOpc;
+ unsigned RealOpc;
+ bool IsLoad;
+ bool HasWriteBack;
+ NEONRegSpacing RegSpacing;
+ unsigned char NumRegs; // D registers loaded or stored
+ unsigned char RegElts; // elements per D register; used for lane ops
+
+ // Comparison methods for binary search of the table.
+ bool operator<(const NEONLdStTableEntry &TE) const {
+ return PseudoOpc < TE.PseudoOpc;
+ }
+ friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
+ return TE.PseudoOpc < PseudoOpc;
+ }
+ friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
+ const NEONLdStTableEntry &TE) {
+ return PseudoOpc < TE.PseudoOpc;
+ }
+ };
+}
+
+static const NEONLdStTableEntry NEONLdStTable[] = {
+{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, SingleSpc, 2, 4},
+{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, SingleSpc, 2, 4},
+{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, SingleSpc, 2, 2},
+{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, SingleSpc, 2, 2},
+{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, SingleSpc, 2, 8},
+{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, SingleSpc, 2, 8},
+
+{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 },
+{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, EvenDblSpc, 1, 4 },
+{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 },
+{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, EvenDblSpc, 1, 2 },
+{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 },
+{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 },
+
+{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 },
+{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 },
+{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 },
+{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 },
+
+{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 },
+{ ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 },
+{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 },
+{ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q32_UPD, true, true, SingleSpc, 2, 2 },
+{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 },
+{ ARM::VLD1q64Pseudo_UPD, ARM::VLD1q64_UPD, true, true, SingleSpc, 2, 1 },
+{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 },
+{ ARM::VLD1q8Pseudo_UPD, ARM::VLD1q8_UPD, true, true, SingleSpc, 2, 8 },
+
+{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, SingleSpc, 2, 4},
+{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, SingleSpc, 2, 4},
+{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, SingleSpc, 2, 2},
+{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, SingleSpc, 2, 2},
+{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, SingleSpc, 2, 8},
+{ ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd8_UPD, true, true, SingleSpc, 2, 8},
+
+{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, SingleSpc, 2, 4 },
+{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, SingleSpc, 2, 4 },
+{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, SingleSpc, 2, 2 },
+{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, SingleSpc, 2, 2 },
+{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, SingleSpc, 2, 8 },
+{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, SingleSpc, 2, 8 },
+{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 },
+{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 },
+
+{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 },
+{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 },
+{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 },
+{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 },
+{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 },
+{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 },
+
+{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 },
+{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 },
+{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 },
+{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 },
+{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 },
+{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 },
+
+{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, SingleSpc, 3, 4},
+{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, SingleSpc, 3, 4},
+{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, SingleSpc, 3, 2},
+{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, SingleSpc, 3, 2},
+{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, SingleSpc, 3, 8},
+{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, SingleSpc, 3, 8},
+
+{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 },
+{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 },
+{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 },
+{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, SingleSpc, 3, 2 },
+{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, SingleSpc, 3, 8 },
+{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, SingleSpc, 3, 8 },
+{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, EvenDblSpc, 3, 2 },
+{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, EvenDblSpc, 3, 2 },
+
+{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, SingleSpc, 3, 4 },
+{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, SingleSpc, 3, 4 },
+{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, SingleSpc, 3, 2 },
+{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, SingleSpc, 3, 2 },
+{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, SingleSpc, 3, 8 },
+{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 },
+
+{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 },
+{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, OddDblSpc, 3, 4 },
+{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 },
+{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 },
+{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, OddDblSpc, 3, 2 },
+{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 },
+{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 },
+{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, OddDblSpc, 3, 8 },
+{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 },
+
+{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, SingleSpc, 4, 4},
+{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, SingleSpc, 4, 4},
+{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, SingleSpc, 4, 2},
+{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, SingleSpc, 4, 2},
+{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, SingleSpc, 4, 8},
+{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, SingleSpc, 4, 8},
+
+{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, SingleSpc, 4, 4 },
+{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, SingleSpc, 4, 4 },
+{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, SingleSpc, 4, 2 },
+{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, SingleSpc, 4, 2 },
+{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, SingleSpc, 4, 8 },
+{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, SingleSpc, 4, 8 },
+{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, EvenDblSpc, 4, 2 },
+{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, EvenDblSpc, 4, 2 },
+
+{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, SingleSpc, 4, 4 },
+{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, SingleSpc, 4, 4 },
+{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, SingleSpc, 4, 2 },
+{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, SingleSpc, 4, 2 },
+{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, SingleSpc, 4, 8 },
+{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 },
+
+{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 },
+{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, OddDblSpc, 4, 4 },
+{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 },
+{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 },
+{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, OddDblSpc, 4, 2 },
+{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 },
+{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 },
+{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, OddDblSpc, 4, 8 },
+{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 },
+
+{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true, EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true, EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, EvenDblSpc, 1, 8 },
+{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, EvenDblSpc, 1, 8 },
+
+{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 },
+{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 },
+{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 },
+{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, SingleSpc, 3, 1 },
+
+{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, SingleSpc, 2, 4 },
+{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, SingleSpc, 2, 4 },
+{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, SingleSpc, 2, 2 },
+{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, SingleSpc, 2, 2 },
+{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, SingleSpc, 2, 1 },
+{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, SingleSpc, 2, 1 },
+{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, SingleSpc, 2, 8 },
+{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, SingleSpc, 2, 8 },
+
+{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, SingleSpc, 2, 4 },
+{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, SingleSpc, 2, 4 },
+{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, SingleSpc, 2, 2 },
+{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, SingleSpc, 2, 2 },
+{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, SingleSpc, 2, 8 },
+{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, SingleSpc, 2, 8 },
+{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, EvenDblSpc, 2, 4},
+{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, EvenDblSpc, 2, 4},
+{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, EvenDblSpc, 2, 2},
+{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, EvenDblSpc, 2, 2},
+
+{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, SingleSpc, 2, 4 },
+{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, SingleSpc, 2, 4 },
+{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, SingleSpc, 2, 2 },
+{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, SingleSpc, 2, 2 },
+{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, SingleSpc, 2, 8 },
+{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, SingleSpc, 2, 8 },
+
+{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, SingleSpc, 4, 4 },
+{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, SingleSpc, 4, 4 },
+{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, SingleSpc, 4, 2 },
+{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, SingleSpc, 4, 2 },
+{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, SingleSpc, 4, 8 },
+{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, SingleSpc, 4, 8 },
+
+{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, SingleSpc, 3, 4 },
+{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, SingleSpc, 3, 4 },
+{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, SingleSpc, 3, 2 },
+{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, SingleSpc, 3, 2 },
+{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, SingleSpc, 3, 8 },
+{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, SingleSpc, 3, 8 },
+{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, EvenDblSpc, 3, 4},
+{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, EvenDblSpc, 3, 4},
+{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, EvenDblSpc, 3, 2},
+{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, EvenDblSpc, 3, 2},
+
+{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, SingleSpc, 3, 4 },
+{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, SingleSpc, 3, 4 },
+{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, SingleSpc, 3, 2 },
+{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, SingleSpc, 3, 2 },
+{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, SingleSpc, 3, 8 },
+{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 },
+
+{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 },
+{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, OddDblSpc, 3, 4 },
+{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 },
+{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 },
+{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, OddDblSpc, 3, 2 },
+{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 },
+{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 },
+{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, OddDblSpc, 3, 8 },
+{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 },
+
+{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 },
+{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, SingleSpc, 4, 4 },
+{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, SingleSpc, 4, 2 },
+{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, SingleSpc, 4, 2 },
+{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, SingleSpc, 4, 8 },
+{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, SingleSpc, 4, 8 },
+{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, EvenDblSpc, 4, 4},
+{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, EvenDblSpc, 4, 4},
+{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, EvenDblSpc, 4, 2},
+{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, EvenDblSpc, 4, 2},
+
+{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, SingleSpc, 4, 4 },
+{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, SingleSpc, 4, 4 },
+{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, SingleSpc, 4, 2 },
+{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, SingleSpc, 4, 2 },
+{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, SingleSpc, 4, 8 },
+{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 },
+
+{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 },
+{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, OddDblSpc, 4, 4 },
+{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 },
+{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 },
+{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, OddDblSpc, 4, 2 },
+{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 },
+{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 },
+{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, OddDblSpc, 4, 8 },
+{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 }
+};
+
+/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
+/// load or store pseudo instruction.
+static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
+ unsigned NumEntries = array_lengthof(NEONLdStTable);
+
+#ifndef NDEBUG
+ // Make sure the table is sorted.
+ static bool TableChecked = false;
+ if (!TableChecked) {
+ for (unsigned i = 0; i != NumEntries-1; ++i)
+ assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
+ "NEONLdStTable is not sorted!");
+ TableChecked = true;
+ }
+#endif
+
+ const NEONLdStTableEntry *I =
+ std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
+ if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
+ return I;
+ return NULL;
+}
+
+/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
+/// corresponding to the specified register spacing. Not all of the results
+/// are necessarily valid, e.g., a Q register only has 2 D subregisters.
+static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
+ const TargetRegisterInfo *TRI, unsigned &D0,
+ unsigned &D1, unsigned &D2, unsigned &D3) {
+ if (RegSpc == SingleSpc) {
+ D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_1);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_2);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_3);
+ } else if (RegSpc == EvenDblSpc) {
+ D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_2);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_4);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_6);
+ } else {
+ assert(RegSpc == OddDblSpc && "unknown register spacing");
+ D0 = TRI->getSubReg(Reg, ARM::dsub_1);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_3);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_5);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_7);
+ }
+}
+
+/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VLD instructions with D register operands.
+void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // For an instruction writing double-spaced subregs, the pseudo instruction
+ // has an extra operand that is a use of the super-register. Record the
+ // operand index and skip over it.
+ unsigned SrcOpIdx = 0;
+ if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc)
+ SrcOpIdx = OpIdx++;
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the super-register source operand used for double-spaced subregs over
+ // to the new instruction as an implicit operand.
+ if (SrcOpIdx != 0) {
+ MachineOperand MO = MI.getOperand(SrcOpIdx);
+ MO.setImplicit(true);
+ MIB.addOperand(MO);
+ }
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+}
+
+/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VST instructions with D register operands.
+void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0).addReg(D1);
+ if (NumRegs > 2)
+ MIB.addReg(D2);
+ if (NumRegs > 3)
+ MIB.addReg(D3);
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ if (SrcIsKill)
+ // Add an implicit kill for the super-reg.
+ (*MIB).addRegisterKilled(SrcReg, TRI, true);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+}
+
+/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+ unsigned RegElts = TableEntry->RegElts;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+ // The lane operand is always the 3rd from last operand, before the 2
+ // predicate operands.
+ unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
+
+ // Adjust the lane and spacing as needed for Q registers.
+ assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
+ if (RegSpc == EvenDblSpc && Lane >= RegElts) {
+ RegSpc = OddDblSpc;
+ Lane -= RegElts;
+ }
+ assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
+
+ unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0;
+ unsigned DstReg = 0;
+ bool DstIsDead = false;
+ if (TableEntry->IsLoad) {
+ DstIsDead = MI.getOperand(OpIdx).isDead();
+ DstReg = MI.getOperand(OpIdx++).getReg();
+ GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 1)
+ MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+ }
+
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Grab the super-register source.
+ MachineOperand MO = MI.getOperand(OpIdx++);
+ if (!TableEntry->IsLoad)
+ GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
+
+ // Add the subregs as sources of the new instruction.
+ unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
+ getKillRegState(MO.isKill()));
+ MIB.addReg(D0, SrcFlags);
+ if (NumRegs > 1)
+ MIB.addReg(D1, SrcFlags);
+ if (NumRegs > 2)
+ MIB.addReg(D2, SrcFlags);
+ if (NumRegs > 3)
+ MIB.addReg(D3, SrcFlags);
+
+ // Add the lane number operand.
+ MIB.addImm(Lane);
+ OpIdx += 1;
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the super-register source to be an implicit source.
+ MO.setImplicit(true);
+ MIB.addOperand(MO);
+ if (TableEntry->IsLoad)
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+}
+
+/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool IsExt, unsigned NumRegs) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ unsigned OpIdx = 0;
+
+ // Transfer the destination register operand.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ if (IsExt)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0).addReg(D1);
+ if (NumRegs > 2)
+ MIB.addReg(D2);
+ if (NumRegs > 3)
+ MIB.addReg(D3);
+
+ // Copy the other source register operand.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ if (SrcIsKill)
+ // Add an implicit kill for the super-reg.
+ (*MIB).addRegisterKilled(SrcReg, TRI, true);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+}
+
+void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
+ const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
+ MachineInstrBuilder LO16, HI16;
+
+ if (!STI->hasV6T2Ops() &&
+ (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
+ // Expand into a movi + orr.
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
+ unsigned ImmVal = (unsigned)MO.getImm();
+ unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ LO16 = LO16.addImm(SOImmValV1);
+ HI16 = HI16.addImm(SOImmValV2);
+ (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg).addReg(0);
+ HI16.addImm(Pred).addReg(PredReg).addReg(0);
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+ return;
+ }
+
+ unsigned LO16Opc = 0;
+ unsigned HI16Opc = 0;
+ if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
+ LO16Opc = ARM::t2MOVi16;
+ HI16Opc = ARM::t2MOVTi16;
+ } else {
+ LO16Opc = ARM::MOVi16;
+ HI16Opc = ARM::MOVTi16;
+ }
+
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ if (MO.isImm()) {
+ unsigned Imm = MO.getImm();
+ unsigned Lo16 = Imm & 0xffff;
+ unsigned Hi16 = (Imm >> 16) & 0xffff;
+ LO16 = LO16.addImm(Lo16);
+ HI16 = HI16.addImm(Hi16);
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ unsigned TF = MO.getTargetFlags();
+ LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+ HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+ }
+
+ (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg);
+ HI16.addImm(Pred).addReg(PredReg);
+
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+}
+
+bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::Int_eh_sjlj_dispatchsetup: {
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const ARMBaseInstrInfo *AII =
+ static_cast<const ARMBaseInstrInfo*>(TII);
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+ // For functions using a base pointer, we rematerialize it (via the frame
+ // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
+ // for us. Otherwise, expand to nothing.
+ if (RI.hasBasePointer(MF)) {
+ int32_t NumBytes = AFI->getFramePtrSpillOffset();
+ unsigned FramePtr = RI.getFrameRegister(MF);
+ assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ "base pointer without frame pointer?");
+
+ if (AFI->isThumb2Function()) {
+ llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
+ } else if (AFI->isThumbFunction()) {
+ llvm::emitThumbRegPlusImmediate(MBB, MBBI, ARM::R6,
+ FramePtr, -NumBytes,
+ *TII, RI, MI.getDebugLoc());
+ } else {
+ llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0,
+ *TII);
+ }
+ // If there's dynamic realignment, adjust for it.
+ if (RI.needsStackRealignment(MF)) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert (!AFI->isThumb1OnlyFunction());
+ // Emit bic r6, r6, MaxAlign
+ unsigned bicOpc = AFI->isThumbFunction() ?
+ ARM::t2BICri : ARM::BICri;
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(bicOpc), ARM::R6)
+ .addReg(ARM::R6, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ }
+
+ }
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOVsrl_flag:
+ case ARM::MOVsra_flag: {
+ // These are just fancy MOVs insructions.
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+ MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1))
+ .addReg(0)
+ .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr
+ : ARM_AM::asr), 1)))
+ .addReg(ARM::CPSR, RegState::Define);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::RRX: {
+ // This encodes as "MOVs Rd, Rm, rrx
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+ MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1))
+ .addOperand(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
+ .addReg(0);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::TPsoft: {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::BL))
+ .addExternalSymbol("__aeabi_read_tp", 0);
+
+ (*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
+ ? ARM::tLDRpci : ARM::t2LDRpci;
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ MachineInstrBuilder MIB1 =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(NewLdOpc), DstReg)
+ .addOperand(MI.getOperand(1)));
+ (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::tPICADD))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .addOperand(MI.getOperand(2));
+ TransferImpOps(MI, MIB1, MIB2);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOV_ga_dyn:
+ case ARM::MOV_ga_pcrel:
+ case ARM::MOV_ga_pcrel_ldr:
+ case ARM::t2MOV_ga_dyn:
+ case ARM::t2MOV_ga_pcrel: {
+ // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
+ unsigned LabelId = AFI->createPICLabelUId();
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ const MachineOperand &MO1 = MI.getOperand(1);
+ const GlobalValue *GV = MO1.getGlobal();
+ unsigned TF = MO1.getTargetFlags();
+ bool isARM = Opcode != ARM::t2MOV_ga_pcrel;
+ bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn);
+ unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
+ unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel : ARM::t2MOVTi16_ga_pcrel;
+ unsigned LO16TF = isPIC
+ ? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY;
+ unsigned HI16TF = isPIC
+ ? ARMII::MO_HI16_NONLAZY_PIC : ARMII::MO_HI16_NONLAZY;
+ unsigned PICAddOpc = isARM
+ ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
+ : ARM::tPICADD;
+ MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(LO16Opc), DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
+ .addImm(LabelId);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(HI16Opc), DstReg)
+ .addReg(DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
+ .addImm(LabelId);
+ if (!isPIC) {
+ TransferImpOps(MI, MIB1, MIB2);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(PICAddOpc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg).addImm(LabelId);
+ if (isARM) {
+ AddDefaultPred(MIB3);
+ if (Opcode == ARM::MOV_ga_pcrel_ldr)
+ (*MIB2).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ }
+ TransferImpOps(MI, MIB1, MIB3);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOVi32imm:
+ case ARM::MOVCCi32imm:
+ case ARM::t2MOVi32imm:
+ case ARM::t2MOVCCi32imm:
+ ExpandMOV32BitImm(MBB, MBBI);
+ return true;
+
+ case ARM::VMOVQQ: {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0);
+ unsigned OddDst = TRI->getSubReg(DstReg, ARM::qsub_1);
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ bool SrcIsKill = MI.getOperand(1).isKill();
+ unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0);
+ unsigned OddSrc = TRI->getSubReg(SrcReg, ARM::qsub_1);
+ MachineInstrBuilder Even =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::VMOVQ))
+ .addReg(EvenDst,
+ RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(EvenSrc, getKillRegState(SrcIsKill)));
+ MachineInstrBuilder Odd =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::VMOVQ))
+ .addReg(OddDst,
+ RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(OddSrc, getKillRegState(SrcIsKill)));
+ TransferImpOps(MI, Even, Odd);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::VLDMQIA:
+ case ARM::VLDMQDB: {
+ unsigned NewOpc = (Opcode == ARM::VLDMQIA) ? ARM::VLDMDIA : ARM::VLDMDDB;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+
+ // Grab the Q register destination.
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+
+ // Copy the source register.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Add the destination operands (D subregs).
+ unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+ unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::VSTMQIA:
+ case ARM::VSTMQDB: {
+ unsigned NewOpc = (Opcode == ARM::VSTMQIA) ? ARM::VSTMDIA : ARM::VSTMDDB;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+
+ // Grab the Q register source.
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+
+ // Copy the destination register.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Add the source operands (D subregs).
+ unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+ unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+ MIB.addReg(D0).addReg(D1);
+
+ if (SrcIsKill)
+ // Add an implicit kill for the Q register.
+ (*MIB).addRegisterKilled(SrcReg, TRI, true);
+
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::VDUPfqf:
+ case ARM::VDUPfdf:{
+ unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLNfq : ARM::VDUPLNfd;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
+ unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
+ Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
+ // The lane is [0,1] for the containing DReg superregister.
+ // Copy the dst/src register operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addReg(DReg);
+ ++OpIdx;
+ // Add the lane select operand.
+ MIB.addImm(Lane);
+ // Add the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::VLD1q8Pseudo:
+ case ARM::VLD1q16Pseudo:
+ case ARM::VLD1q32Pseudo:
+ case ARM::VLD1q64Pseudo:
+ case ARM::VLD1q8Pseudo_UPD:
+ case ARM::VLD1q16Pseudo_UPD:
+ case ARM::VLD1q32Pseudo_UPD:
+ case ARM::VLD1q64Pseudo_UPD:
+ case ARM::VLD2d8Pseudo:
+ case ARM::VLD2d16Pseudo:
+ case ARM::VLD2d32Pseudo:
+ case ARM::VLD2q8Pseudo:
+ case ARM::VLD2q16Pseudo:
+ case ARM::VLD2q32Pseudo:
+ case ARM::VLD2d8Pseudo_UPD:
+ case ARM::VLD2d16Pseudo_UPD:
+ case ARM::VLD2d32Pseudo_UPD:
+ case ARM::VLD2q8Pseudo_UPD:
+ case ARM::VLD2q16Pseudo_UPD:
+ case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD3d8Pseudo:
+ case ARM::VLD3d16Pseudo:
+ case ARM::VLD3d32Pseudo:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD3d8Pseudo_UPD:
+ case ARM::VLD3d16Pseudo_UPD:
+ case ARM::VLD3d32Pseudo_UPD:
+ case ARM::VLD1d64TPseudo_UPD:
+ case ARM::VLD3q8Pseudo_UPD:
+ case ARM::VLD3q16Pseudo_UPD:
+ case ARM::VLD3q32Pseudo_UPD:
+ case ARM::VLD3q8oddPseudo:
+ case ARM::VLD3q16oddPseudo:
+ case ARM::VLD3q32oddPseudo:
+ case ARM::VLD3q8oddPseudo_UPD:
+ case ARM::VLD3q16oddPseudo_UPD:
+ case ARM::VLD3q32oddPseudo_UPD:
+ case ARM::VLD4d8Pseudo:
+ case ARM::VLD4d16Pseudo:
+ case ARM::VLD4d32Pseudo:
+ case ARM::VLD1d64QPseudo:
+ case ARM::VLD4d8Pseudo_UPD:
+ case ARM::VLD4d16Pseudo_UPD:
+ case ARM::VLD4d32Pseudo_UPD:
+ case ARM::VLD1d64QPseudo_UPD:
+ case ARM::VLD4q8Pseudo_UPD:
+ case ARM::VLD4q16Pseudo_UPD:
+ case ARM::VLD4q32Pseudo_UPD:
+ case ARM::VLD4q8oddPseudo:
+ case ARM::VLD4q16oddPseudo:
+ case ARM::VLD4q32oddPseudo:
+ case ARM::VLD4q8oddPseudo_UPD:
+ case ARM::VLD4q16oddPseudo_UPD:
+ case ARM::VLD4q32oddPseudo_UPD:
+ case ARM::VLD1DUPq8Pseudo:
+ case ARM::VLD1DUPq16Pseudo:
+ case ARM::VLD1DUPq32Pseudo:
+ case ARM::VLD1DUPq8Pseudo_UPD:
+ case ARM::VLD1DUPq16Pseudo_UPD:
+ case ARM::VLD1DUPq32Pseudo_UPD:
+ case ARM::VLD2DUPd8Pseudo:
+ case ARM::VLD2DUPd16Pseudo:
+ case ARM::VLD2DUPd32Pseudo:
+ case ARM::VLD2DUPd8Pseudo_UPD:
+ case ARM::VLD2DUPd16Pseudo_UPD:
+ case ARM::VLD2DUPd32Pseudo_UPD:
+ case ARM::VLD3DUPd8Pseudo:
+ case ARM::VLD3DUPd16Pseudo:
+ case ARM::VLD3DUPd32Pseudo:
+ case ARM::VLD3DUPd8Pseudo_UPD:
+ case ARM::VLD3DUPd16Pseudo_UPD:
+ case ARM::VLD3DUPd32Pseudo_UPD:
+ case ARM::VLD4DUPd8Pseudo:
+ case ARM::VLD4DUPd16Pseudo:
+ case ARM::VLD4DUPd32Pseudo:
+ case ARM::VLD4DUPd8Pseudo_UPD:
+ case ARM::VLD4DUPd16Pseudo_UPD:
+ case ARM::VLD4DUPd32Pseudo_UPD:
+ ExpandVLD(MBBI);
+ return true;
+
+ case ARM::VST1q8Pseudo:
+ case ARM::VST1q16Pseudo:
+ case ARM::VST1q32Pseudo:
+ case ARM::VST1q64Pseudo:
+ case ARM::VST1q8Pseudo_UPD:
+ case ARM::VST1q16Pseudo_UPD:
+ case ARM::VST1q32Pseudo_UPD:
+ case ARM::VST1q64Pseudo_UPD:
+ case ARM::VST2d8Pseudo:
+ case ARM::VST2d16Pseudo:
+ case ARM::VST2d32Pseudo:
+ case ARM::VST2q8Pseudo:
+ case ARM::VST2q16Pseudo:
+ case ARM::VST2q32Pseudo:
+ case ARM::VST2d8Pseudo_UPD:
+ case ARM::VST2d16Pseudo_UPD:
+ case ARM::VST2d32Pseudo_UPD:
+ case ARM::VST2q8Pseudo_UPD:
+ case ARM::VST2q16Pseudo_UPD:
+ case ARM::VST2q32Pseudo_UPD:
+ case ARM::VST3d8Pseudo:
+ case ARM::VST3d16Pseudo:
+ case ARM::VST3d32Pseudo:
+ case ARM::VST1d64TPseudo:
+ case ARM::VST3d8Pseudo_UPD:
+ case ARM::VST3d16Pseudo_UPD:
+ case ARM::VST3d32Pseudo_UPD:
+ case ARM::VST1d64TPseudo_UPD:
+ case ARM::VST3q8Pseudo_UPD:
+ case ARM::VST3q16Pseudo_UPD:
+ case ARM::VST3q32Pseudo_UPD:
+ case ARM::VST3q8oddPseudo:
+ case ARM::VST3q16oddPseudo:
+ case ARM::VST3q32oddPseudo:
+ case ARM::VST3q8oddPseudo_UPD:
+ case ARM::VST3q16oddPseudo_UPD:
+ case ARM::VST3q32oddPseudo_UPD:
+ case ARM::VST4d8Pseudo:
+ case ARM::VST4d16Pseudo:
+ case ARM::VST4d32Pseudo:
+ case ARM::VST1d64QPseudo:
+ case ARM::VST4d8Pseudo_UPD:
+ case ARM::VST4d16Pseudo_UPD:
+ case ARM::VST4d32Pseudo_UPD:
+ case ARM::VST1d64QPseudo_UPD:
+ case ARM::VST4q8Pseudo_UPD:
+ case ARM::VST4q16Pseudo_UPD:
+ case ARM::VST4q32Pseudo_UPD:
+ case ARM::VST4q8oddPseudo:
+ case ARM::VST4q16oddPseudo:
+ case ARM::VST4q32oddPseudo:
+ case ARM::VST4q8oddPseudo_UPD:
+ case ARM::VST4q16oddPseudo_UPD:
+ case ARM::VST4q32oddPseudo_UPD:
+ ExpandVST(MBBI);
+ return true;
+
+ case ARM::VLD1LNq8Pseudo:
+ case ARM::VLD1LNq16Pseudo:
+ case ARM::VLD1LNq32Pseudo:
+ case ARM::VLD1LNq8Pseudo_UPD:
+ case ARM::VLD1LNq16Pseudo_UPD:
+ case ARM::VLD1LNq32Pseudo_UPD:
+ case ARM::VLD2LNd8Pseudo:
+ case ARM::VLD2LNd16Pseudo:
+ case ARM::VLD2LNd32Pseudo:
+ case ARM::VLD2LNq16Pseudo:
+ case ARM::VLD2LNq32Pseudo:
+ case ARM::VLD2LNd8Pseudo_UPD:
+ case ARM::VLD2LNd16Pseudo_UPD:
+ case ARM::VLD2LNd32Pseudo_UPD:
+ case ARM::VLD2LNq16Pseudo_UPD:
+ case ARM::VLD2LNq32Pseudo_UPD:
+ case ARM::VLD3LNd8Pseudo:
+ case ARM::VLD3LNd16Pseudo:
+ case ARM::VLD3LNd32Pseudo:
+ case ARM::VLD3LNq16Pseudo:
+ case ARM::VLD3LNq32Pseudo:
+ case ARM::VLD3LNd8Pseudo_UPD:
+ case ARM::VLD3LNd16Pseudo_UPD:
+ case ARM::VLD3LNd32Pseudo_UPD:
+ case ARM::VLD3LNq16Pseudo_UPD:
+ case ARM::VLD3LNq32Pseudo_UPD:
+ case ARM::VLD4LNd8Pseudo:
+ case ARM::VLD4LNd16Pseudo:
+ case ARM::VLD4LNd32Pseudo:
+ case ARM::VLD4LNq16Pseudo:
+ case ARM::VLD4LNq32Pseudo:
+ case ARM::VLD4LNd8Pseudo_UPD:
+ case ARM::VLD4LNd16Pseudo_UPD:
+ case ARM::VLD4LNd32Pseudo_UPD:
+ case ARM::VLD4LNq16Pseudo_UPD:
+ case ARM::VLD4LNq32Pseudo_UPD:
+ case ARM::VST1LNq8Pseudo:
+ case ARM::VST1LNq16Pseudo:
+ case ARM::VST1LNq32Pseudo:
+ case ARM::VST1LNq8Pseudo_UPD:
+ case ARM::VST1LNq16Pseudo_UPD:
+ case ARM::VST1LNq32Pseudo_UPD:
+ case ARM::VST2LNd8Pseudo:
+ case ARM::VST2LNd16Pseudo:
+ case ARM::VST2LNd32Pseudo:
+ case ARM::VST2LNq16Pseudo:
+ case ARM::VST2LNq32Pseudo:
+ case ARM::VST2LNd8Pseudo_UPD:
+ case ARM::VST2LNd16Pseudo_UPD:
+ case ARM::VST2LNd32Pseudo_UPD:
+ case ARM::VST2LNq16Pseudo_UPD:
+ case ARM::VST2LNq32Pseudo_UPD:
+ case ARM::VST3LNd8Pseudo:
+ case ARM::VST3LNd16Pseudo:
+ case ARM::VST3LNd32Pseudo:
+ case ARM::VST3LNq16Pseudo:
+ case ARM::VST3LNq32Pseudo:
+ case ARM::VST3LNd8Pseudo_UPD:
+ case ARM::VST3LNd16Pseudo_UPD:
+ case ARM::VST3LNd32Pseudo_UPD:
+ case ARM::VST3LNq16Pseudo_UPD:
+ case ARM::VST3LNq32Pseudo_UPD:
+ case ARM::VST4LNd8Pseudo:
+ case ARM::VST4LNd16Pseudo:
+ case ARM::VST4LNd32Pseudo:
+ case ARM::VST4LNq16Pseudo:
+ case ARM::VST4LNq32Pseudo:
+ case ARM::VST4LNd8Pseudo_UPD:
+ case ARM::VST4LNd16Pseudo_UPD:
+ case ARM::VST4LNd32Pseudo_UPD:
+ case ARM::VST4LNq16Pseudo_UPD:
+ case ARM::VST4LNq32Pseudo_UPD:
+ ExpandLaneOp(MBBI);
+ return true;
+
+ case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
+ case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
+ case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
+ case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
+ case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
+ case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
+ }
+
+ return false;
+}
+
+bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+ Modified |= ExpandMI(MBB, MBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ const TargetMachine &TM = MF.getTarget();
+ TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+ TRI = TM.getRegisterInfo();
+ STI = &TM.getSubtarget<ARMSubtarget>();
+ AFI = MF.getInfo<ARMFunctionInfo>();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
+ ++MFI)
+ Modified |= ExpandMBB(*MFI);
+ return Modified;
+}
+
+/// createARMExpandPseudoPass - returns an instance of the pseudo instruction
+/// expansion pass.
+FunctionPass *llvm::createARMExpandPseudoPass() {
+ return new ARMExpandPseudo();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
new file mode 100644
index 0000000..9f29530
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -0,0 +1,1901 @@
+//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM-specific support for the FastISel class. Some
+// of the target-specific code is generated by tablegen in the file
+// ARMGenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMCallingConv.h"
+#include "ARMRegisterInfo.h"
+#include "ARMTargetMachine.h"
+#include "ARMSubtarget.h"
+#include "ARMConstantPoolValue.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool>
+DisableARMFastISel("disable-arm-fast-isel",
+ cl::desc("Turn off experimental ARM fast-isel support"),
+ cl::init(false), cl::Hidden);
+
+extern cl::opt<bool> EnableARMLongCalls;
+
+namespace {
+
+ // All possible address modes, plus some.
+ typedef struct Address {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FI;
+ } Base;
+
+ int Offset;
+ unsigned Scale;
+ unsigned PlusReg;
+
+ // Innocuous defaults for our address.
+ Address()
+ : BaseType(RegBase), Offset(0), Scale(0), PlusReg(0) {
+ Base.Reg = 0;
+ }
+ } Address;
+
+class ARMFastISel : public FastISel {
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ const TargetLowering &TLI;
+ ARMFunctionInfo *AFI;
+
+ // Convenience variables to avoid some queries.
+ bool isThumb;
+ LLVMContext *Context;
+
+ public:
+ explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
+ : FastISel(funcInfo),
+ TM(funcInfo.MF->getTarget()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
+ isThumb = AFI->isThumbFunction();
+ Context = &funcInfo.Fn->getContext();
+ }
+
+ // Code from FastISel.cpp.
+ virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC);
+ virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm);
+ virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx);
+
+ // Backend specific FastISel code.
+ virtual bool TargetSelectInstruction(const Instruction *I);
+ virtual unsigned TargetMaterializeConstant(const Constant *C);
+ virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
+
+ #include "ARMGenFastISel.inc"
+
+ // Instruction selection routines.
+ private:
+ bool SelectLoad(const Instruction *I);
+ bool SelectStore(const Instruction *I);
+ bool SelectBranch(const Instruction *I);
+ bool SelectCmp(const Instruction *I);
+ bool SelectFPExt(const Instruction *I);
+ bool SelectFPTrunc(const Instruction *I);
+ bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectSIToFP(const Instruction *I);
+ bool SelectFPToSI(const Instruction *I);
+ bool SelectSDiv(const Instruction *I);
+ bool SelectSRem(const Instruction *I);
+ bool SelectCall(const Instruction *I);
+ bool SelectSelect(const Instruction *I);
+ bool SelectRet(const Instruction *I);
+
+ // Utility routines.
+ private:
+ bool isTypeLegal(const Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(const Type *Ty, MVT &VT);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr);
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
+ bool ARMComputeAddress(const Value *Obj, Address &Addr);
+ void ARMSimplifyAddress(Address &Addr, EVT VT);
+ unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
+ unsigned ARMMaterializeInt(const Constant *C, EVT VT);
+ unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
+ unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
+ unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+
+ // Call handling routines.
+ private:
+ bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
+ unsigned &ResultReg);
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
+ bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes);
+ bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes);
+ bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
+
+ // OptionalDef handling routines.
+ private:
+ bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
+ const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
+ void AddLoadStoreOperands(EVT VT, Address &Addr,
+ const MachineInstrBuilder &MIB);
+};
+
+} // end anonymous namespace
+
+#include "ARMGenCallingConv.inc"
+
+// DefinesOptionalPredicate - This is different from DefinesPredicate in that
+// we don't care about implicit defs here, just places we'll need to add a
+// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
+bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.hasOptionalDef())
+ return false;
+
+ // Look to see if our OptionalDef is defining CPSR or CCR.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ if (MO.getReg() == ARM::CPSR)
+ *CPSR = true;
+ }
+ return true;
+}
+
+// If the machine is predicable go ahead and add the predicate operands, if
+// it needs default CC operands add those.
+// TODO: If we want to support thumb1 then we'll need to deal with optional
+// CPSR defs that need to be added before the remaining operands. See s_cc_out
+// for descriptions why.
+const MachineInstrBuilder &
+ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
+ MachineInstr *MI = &*MIB;
+
+ // Do we use a predicate?
+ if (TII.isPredicable(MI))
+ AddDefaultPred(MIB);
+
+ // Do we optionally set a predicate? Preds is size > 0 iff the predicate
+ // defines CPSR. All other OptionalDefines in ARM are the CCR register.
+ bool CPSR = false;
+ if (DefinesOptionalPredicate(MI, &CPSR)) {
+ if (CPSR)
+ AddDefaultT1CC(MIB);
+ else
+ AddDefaultCC(MIB);
+ }
+ return MIB;
+}
+
+unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx) {
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx));
+ return ResultReg;
+}
+
+// TODO: Don't worry about 64-bit now, but when this is fixed remove the
+// checks from the various callers.
+unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
+ if (VT == MVT::f64) return 0;
+
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRS), MoveReg)
+ .addReg(SrcReg));
+ return MoveReg;
+}
+
+unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
+ if (VT == MVT::i64) return 0;
+
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVSR), MoveReg)
+ .addReg(SrcReg));
+ return MoveReg;
+}
+
+// For double width floating point we need to materialize two constants
+// (the high and the low) into integer registers then use a move to get
+// the combined constant into an FP reg.
+unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
+ const APFloat Val = CFP->getValueAPF();
+ bool is64bit = VT == MVT::f64;
+
+ // This checks to see if we can use VFP3 instructions to materialize
+ // a constant, otherwise we have to go through the constant pool.
+ if (TLI.isFPImmLegal(Val, VT)) {
+ unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS;
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ DestReg)
+ .addFPImm(CFP));
+ return DestReg;
+ }
+
+ // Require VFP2 for loading fp constants.
+ if (!Subtarget->hasVFP2()) return false;
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(CFP->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
+
+ // The extra reg is for addrmode5.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ DestReg)
+ .addConstantPoolIndex(Idx)
+ .addReg(0));
+ return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
+
+ // For now 32-bit only.
+ if (VT != MVT::i32) return false;
+
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+
+ // If we can do this in a single instruction without a constant pool entry
+ // do so now.
+ const ConstantInt *CI = cast<ConstantInt>(C);
+ if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) {
+ unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), DestReg)
+ .addImm(CI->getSExtValue()));
+ return DestReg;
+ }
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(C->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(C->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(C, Align);
+
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRpci), DestReg)
+ .addConstantPoolIndex(Idx));
+ else
+ // The extra immediate is for addrmode2.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::LDRcp), DestReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
+
+ return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
+ // For now 32-bit only.
+ if (VT != MVT::i32) return 0;
+
+ Reloc::Model RelocM = TM.getRelocationModel();
+
+ // TODO: No external globals for now.
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) return 0;
+
+ // TODO: Need more magic for ARM PIC.
+ if (!isThumb && (RelocM == Reloc::PIC_)) return 0;
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(GV->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(GV->getType());
+ }
+
+ // Grab index.
+ unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ unsigned Id = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id,
+ ARMCP::CPValue, PCAdj);
+ unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+ // Load value.
+ MachineInstrBuilder MIB;
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ if (isThumb) {
+ unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addConstantPoolIndex(Idx);
+ if (RelocM == Reloc::PIC_)
+ MIB.addImm(Id);
+ } else {
+ // The extra immediate is for addrmode2.
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
+ DestReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0);
+ }
+ AddOptionalDefs(MIB);
+ return DestReg;
+}
+
+unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+ EVT VT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!VT.isSimple()) return 0;
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return ARMMaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return ARMMaterializeGV(GV, VT);
+ else if (isa<ConstantInt>(C))
+ return ARMMaterializeInt(C, VT);
+
+ return 0;
+}
+
+unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+ // Don't handle dynamic allocas.
+ if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+ MVT VT;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return false;
+
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ // This will get lowered later into the correct offsets and registers
+ // via rewriteXFrameIndex.
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ TargetRegisterClass* RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addFrameIndex(SI->second)
+ .addImm(0));
+ return ResultReg;
+ }
+
+ return 0;
+}
+
+bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) {
+ EVT evt = TLI.getValueType(Ty, true);
+
+ // Only handle simple types.
+ if (evt == MVT::Other || !evt.isSimple()) return false;
+ VT = evt.getSimpleVT();
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) {
+ if (isTypeLegal(Ty, VT)) return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i8 || VT == MVT::i16)
+ return true;
+
+ return false;
+}
+
+// Computes the address to get to an object.
+bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
+ // Some boilerplate from the X86 FastISel.
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ // Don't walk into other basic blocks unless the object is an alloca from
+ // another block, otherwise it may not have a virtual register assigned.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::BitCast: {
+ // Look through bitcasts.
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ }
+ case Instruction::IntToPtr: {
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ break;
+ }
+ case Instruction::PtrToInt: {
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ Address SavedAddr = Addr;
+ int TmpOffset = Addr.Offset;
+
+ // Iterate through the GEP folding the constants into offsets where
+ // we can.
+ gep_type_iterator GTI = gep_type_begin(U);
+ for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+ i != e; ++i, ++GTI) {
+ const Value *Op = *i;
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+ TmpOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(Op);
+ do {
+ Op = Worklist.pop_back_val();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ TmpOffset += CI->getSExtValue() * S;
+ } else if (isa<AddOperator>(Op) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add with a constant operand. Fold the constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ TmpOffset += CI->getSExtValue() * S;
+ // Add the other operand back to the work list.
+ Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
+ } else
+ goto unsupported_gep;
+ } while (!Worklist.empty());
+ }
+ }
+
+ // Try to grab the base operand now.
+ Addr.Offset = TmpOffset;
+ if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
+
+ // We failed, restore everything and try the other options.
+ Addr = SavedAddr;
+
+ unsupported_gep:
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst *AI = cast<AllocaInst>(Obj);
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = SI->second;
+ return true;
+ }
+ break;
+ }
+ }
+
+ // Materialize the global variable's address into a reg which can
+ // then be used later to load the variable.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
+ unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType()));
+ if (Tmp == 0) return false;
+
+ Addr.Base.Reg = Tmp;
+ return true;
+ }
+
+ // Try to get this in a register if nothing else has worked.
+ if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
+ return Addr.Base.Reg != 0;
+}
+
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
+
+ assert(VT.isSimple() && "Non-simple types are invalid here!");
+
+ bool needsLowering = false;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Unhandled load/store type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // Integer loads/stores handle 12-bit offsets.
+ needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ // Floating point operands handle 8-bit offsets.
+ needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
+ break;
+ }
+
+ // If this is a stack pointer and the offset needs to be simplified then
+ // put the alloca address into a register, set the base type back to
+ // register and continue. This should almost never happen.
+ if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
+ TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
+ ARM::GPRRegisterClass;
+ unsigned ResultReg = createResultReg(RC);
+ unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addFrameIndex(Addr.Base.FI)
+ .addImm(0));
+ Addr.Base.Reg = ResultReg;
+ Addr.BaseType = Address::RegBase;
+ }
+
+ // Since the offset is too large for the load/store instruction
+ // get the reg+offset into a register.
+ if (needsLowering) {
+ ARMCC::CondCodes Pred = ARMCC::AL;
+ unsigned PredReg = 0;
+
+ TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
+ ARM::GPRRegisterClass;
+ unsigned BaseReg = createResultReg(RC);
+
+ if (!isThumb)
+ emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BaseReg, Addr.Base.Reg, Addr.Offset,
+ Pred, PredReg,
+ static_cast<const ARMBaseInstrInfo&>(TII));
+ else {
+ assert(AFI->isThumb2Function());
+ emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BaseReg, Addr.Base.Reg, Addr.Offset, Pred, PredReg,
+ static_cast<const ARMBaseInstrInfo&>(TII));
+ }
+ Addr.Offset = 0;
+ Addr.Base.Reg = BaseReg;
+ }
+}
+
+void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
+ const MachineInstrBuilder &MIB) {
+ // addrmode5 output depends on the selection dag addressing dividing the
+ // offset by 4 that it then later multiplies. Do this here as well.
+ if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
+ VT.getSimpleVT().SimpleTy == MVT::f64)
+ Addr.Offset /= 4;
+
+ // Frame base works a bit differently. Handle it separately.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+ int FI = Addr.Base.FI;
+ int Offset = Addr.Offset;
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(FI, Offset),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ // Now add the rest of the operands.
+ MIB.addFrameIndex(FI);
+
+ // ARM halfword load/stores need an additional operand.
+ if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+ MIB.addImm(Addr.Offset);
+ MIB.addMemOperand(MMO);
+ } else {
+ // Now add the rest of the operands.
+ MIB.addReg(Addr.Base.Reg);
+
+ // ARM halfword load/stores need an additional operand.
+ if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+ MIB.addImm(Addr.Offset);
+ }
+ AddOptionalDefs(MIB);
+}
+
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) {
+
+ assert(VT.isSimple() && "Non-simple types are invalid here!");
+ unsigned Opc;
+ TargetRegisterClass *RC;
+ switch (VT.getSimpleVT().SimpleTy) {
+ // This is mostly going to be Neon/vector support.
+ default: return false;
+ case MVT::i16:
+ Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
+ RC = ARM::GPRRegisterClass;
+ break;
+ case MVT::i8:
+ Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12;
+ RC = ARM::GPRRegisterClass;
+ break;
+ case MVT::i32:
+ Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12;
+ RC = ARM::GPRRegisterClass;
+ break;
+ case MVT::f32:
+ Opc = ARM::VLDRS;
+ RC = TLI.getRegClassFor(VT);
+ break;
+ case MVT::f64:
+ Opc = ARM::VLDRD;
+ RC = TLI.getRegClassFor(VT);
+ break;
+ }
+ // Simplify this down to something we can handle.
+ ARMSimplifyAddress(Addr, VT);
+
+ // Create the base instruction, then add the operands.
+ ResultReg = createResultReg(RC);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg);
+ AddLoadStoreOperands(VT, Addr, MIB);
+ return true;
+}
+
+bool ARMFastISel::SelectLoad(const Instruction *I) {
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
+
+ unsigned ResultReg;
+ if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
+ unsigned StrOpc;
+ switch (VT.getSimpleVT().SimpleTy) {
+ // This is mostly going to be Neon/vector support.
+ default: return false;
+ case MVT::i1: {
+ unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass :
+ ARM::GPRRegisterClass);
+ unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), Res)
+ .addReg(SrcReg).addImm(1));
+ SrcReg = Res;
+ } // Fallthrough here.
+ case MVT::i8:
+ StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12;
+ break;
+ case MVT::i16:
+ StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH;
+ break;
+ case MVT::i32:
+ StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12;
+ break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRS;
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRD;
+ break;
+ }
+ // Simplify this down to something we can handle.
+ ARMSimplifyAddress(Addr, VT);
+
+ // Create the base instruction, then add the operands.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc))
+ .addReg(SrcReg, getKillRegState(true));
+ AddLoadStoreOperands(VT, Addr, MIB);
+ return true;
+}
+
+bool ARMFastISel::SelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0) return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(I->getOperand(1), Addr))
+ return false;
+
+ if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
+ return true;
+}
+
+static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ // Needs two compares...
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UEQ:
+ default:
+ // AL is our "false" for now. The other two need more compares.
+ return ARMCC::AL;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::FCMP_OEQ:
+ return ARMCC::EQ;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::FCMP_OGT:
+ return ARMCC::GT;
+ case CmpInst::ICMP_SGE:
+ case CmpInst::FCMP_OGE:
+ return ARMCC::GE;
+ case CmpInst::ICMP_UGT:
+ case CmpInst::FCMP_UGT:
+ return ARMCC::HI;
+ case CmpInst::FCMP_OLT:
+ return ARMCC::MI;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::FCMP_OLE:
+ return ARMCC::LS;
+ case CmpInst::FCMP_ORD:
+ return ARMCC::VC;
+ case CmpInst::FCMP_UNO:
+ return ARMCC::VS;
+ case CmpInst::FCMP_UGE:
+ return ARMCC::PL;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::FCMP_ULT:
+ return ARMCC::LT;
+ case CmpInst::ICMP_SLE:
+ case CmpInst::FCMP_ULE:
+ return ARMCC::LE;
+ case CmpInst::FCMP_UNE:
+ case CmpInst::ICMP_NE:
+ return ARMCC::NE;
+ case CmpInst::ICMP_UGE:
+ return ARMCC::HS;
+ case CmpInst::ICMP_ULT:
+ return ARMCC::LO;
+ }
+}
+
+bool ARMFastISel::SelectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Simple branch support.
+
+ // If we can, avoid recomputing the compare - redoing it could lead to wonky
+ // behavior.
+ // TODO: Factor this out.
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
+ MVT VT;
+ const Type *Ty = CI->getOperand(0)->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned CmpOpc;
+ switch (VT.SimpleTy) {
+ default: return false;
+ // TODO: Verify compares.
+ case MVT::f32:
+ CmpOpc = ARM::VCMPES;
+ break;
+ case MVT::f64:
+ CmpOpc = ARM::VCMPED;
+ break;
+ case MVT::i32:
+ CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+ break;
+ }
+
+ // Get the compare predicate.
+ ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ unsigned Arg1 = getRegForValue(CI->getOperand(0));
+ if (Arg1 == 0) return false;
+
+ unsigned Arg2 = getRegForValue(CI->getOperand(1));
+ if (Arg2 == 0) return false;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CmpOpc))
+ .addReg(Arg1).addReg(Arg2));
+
+ // For floating point we need to move the result to a comparison register
+ // that we can then use for branches.
+ if (isFloat)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::FMSTAT)));
+
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+ }
+ }
+
+ unsigned CmpReg = getRegForValue(BI->getCondition());
+ if (CmpReg == 0) return false;
+
+ // Re-set the flags just in case.
+ unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(CmpReg).addImm(0));
+
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+}
+
+bool ARMFastISel::SelectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+
+ MVT VT;
+ const Type *Ty = CI->getOperand(0)->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned CmpOpc;
+ unsigned CondReg;
+ switch (VT.SimpleTy) {
+ default: return false;
+ // TODO: Verify compares.
+ case MVT::f32:
+ CmpOpc = ARM::VCMPES;
+ CondReg = ARM::FPSCR;
+ break;
+ case MVT::f64:
+ CmpOpc = ARM::VCMPED;
+ CondReg = ARM::FPSCR;
+ break;
+ case MVT::i32:
+ CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+ CondReg = ARM::CPSR;
+ break;
+ }
+
+ // Get the compare predicate.
+ ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ unsigned Arg1 = getRegForValue(CI->getOperand(0));
+ if (Arg1 == 0) return false;
+
+ unsigned Arg2 = getRegForValue(CI->getOperand(1));
+ if (Arg2 == 0) return false;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(Arg1).addReg(Arg2));
+
+ // For floating point we need to move the result to a comparison register
+ // that we can then use for branches.
+ if (isFloat)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::FMSTAT)));
+
+ // Now set a register based on the comparison. Explicitly set the predicates
+ // here.
+ unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi;
+ TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass
+ : ARM::GPRRegisterClass;
+ unsigned DestReg = createResultReg(RC);
+ Constant *Zero
+ = ConstantInt::get(Type::getInt32Ty(*Context), 0);
+ unsigned ZeroReg = TargetMaterializeConstant(Zero);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
+ .addReg(ZeroReg).addImm(1)
+ .addImm(ARMPred).addReg(CondReg);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+bool ARMFastISel::SelectFPExt(const Instruction *I) {
+ // Make sure we have VFP and that we're extending float to double.
+ if (!Subtarget->hasVFP2()) return false;
+
+ Value *V = I->getOperand(0);
+ if (!I->getType()->isDoubleTy() ||
+ !V->getType()->isFloatTy()) return false;
+
+ unsigned Op = getRegForValue(V);
+ if (Op == 0) return false;
+
+ unsigned Result = createResultReg(ARM::DPRRegisterClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VCVTDS), Result)
+ .addReg(Op));
+ UpdateValueMap(I, Result);
+ return true;
+}
+
+bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
+ // Make sure we have VFP and that we're truncating double to float.
+ if (!Subtarget->hasVFP2()) return false;
+
+ Value *V = I->getOperand(0);
+ if (!(I->getType()->isFloatTy() &&
+ V->getType()->isDoubleTy())) return false;
+
+ unsigned Op = getRegForValue(V);
+ if (Op == 0) return false;
+
+ unsigned Result = createResultReg(ARM::SPRRegisterClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VCVTSD), Result)
+ .addReg(Op));
+ UpdateValueMap(I, Result);
+ return true;
+}
+
+bool ARMFastISel::SelectSIToFP(const Instruction *I) {
+ // Make sure we have VFP.
+ if (!Subtarget->hasVFP2()) return false;
+
+ MVT DstVT;
+ const Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, DstVT))
+ return false;
+
+ unsigned Op = getRegForValue(I->getOperand(0));
+ if (Op == 0) return false;
+
+ // The conversion routine works on fp-reg to fp-reg and the operand above
+ // was an integer, move it to the fp registers if possible.
+ unsigned FP = ARMMoveToFPReg(MVT::f32, Op);
+ if (FP == 0) return false;
+
+ unsigned Opc;
+ if (Ty->isFloatTy()) Opc = ARM::VSITOS;
+ else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
+ else return 0;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ ResultReg)
+ .addReg(FP));
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectFPToSI(const Instruction *I) {
+ // Make sure we have VFP.
+ if (!Subtarget->hasVFP2()) return false;
+
+ MVT DstVT;
+ const Type *RetTy = I->getType();
+ if (!isTypeLegal(RetTy, DstVT))
+ return false;
+
+ unsigned Op = getRegForValue(I->getOperand(0));
+ if (Op == 0) return false;
+
+ unsigned Opc;
+ const Type *OpTy = I->getOperand(0)->getType();
+ if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
+ else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
+ else return 0;
+
+ // f64->s32 or f32->s32 both need an intermediate f32 reg.
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ ResultReg)
+ .addReg(Op));
+
+ // This result needs to be in an integer register, but the conversion only
+ // takes place in fp-regs.
+ unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
+ if (IntReg == 0) return false;
+
+ UpdateValueMap(I, IntReg);
+ return true;
+}
+
+bool ARMFastISel::SelectSelect(const Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ // Things need to be register sized for register moves.
+ if (VT != MVT::i32) return false;
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+
+ unsigned CondReg = getRegForValue(I->getOperand(0));
+ if (CondReg == 0) return false;
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ unsigned Op2Reg = getRegForValue(I->getOperand(2));
+ if (Op2Reg == 0) return false;
+
+ unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(CondReg).addImm(1));
+ unsigned ResultReg = createResultReg(RC);
+ unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+ .addReg(Op1Reg).addReg(Op2Reg)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectSDiv(const Instruction *I) {
+ MVT VT;
+ const Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ // If we have integer div support we should have selected this automagically.
+ // In case we have a real miss go ahead and return false and we'll pick
+ // it up later.
+ if (Subtarget->hasDivide()) return false;
+
+ // Otherwise emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i8)
+ LC = RTLIB::SDIV_I8;
+ else if (VT == MVT::i16)
+ LC = RTLIB::SDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectSRem(const Instruction *I) {
+ MVT VT;
+ const Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i8)
+ LC = RTLIB::SREM_I8;
+ else if (VT == MVT::i16)
+ LC = RTLIB::SREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT VT = TLI.getValueType(I->getType(), true);
+
+ // We can get here in the case when we want to use NEON for our fp
+ // operations, but can't figure out how to. Just use the vfp instructions
+ // if we have them.
+ // FIXME: It'd be nice to use NEON instructions.
+ const Type *Ty = I->getType();
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned Op1 = getRegForValue(I->getOperand(0));
+ if (Op1 == 0) return false;
+
+ unsigned Op2 = getRegForValue(I->getOperand(1));
+ if (Op2 == 0) return false;
+
+ unsigned Opc;
+ bool is64bit = VT == MVT::f64 || VT == MVT::i64;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::FADD:
+ Opc = is64bit ? ARM::VADDD : ARM::VADDS;
+ break;
+ case ISD::FSUB:
+ Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
+ break;
+ case ISD::FMUL:
+ Opc = is64bit ? ARM::VMULD : ARM::VMULS;
+ break;
+ }
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Op1).addReg(Op2));
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Call Handling Code
+
+bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src,
+ EVT SrcVT, unsigned &ResultReg) {
+ unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+ Src, /*TODO: Kill=*/false);
+
+ if (RR != 0) {
+ ResultReg = RR;
+ return true;
+ } else
+ return false;
+}
+
+// This is largely taken directly from CCAssignFnForNode - we don't support
+// varargs in FastISel so that part has been removed.
+// TODO: We may not support all of this.
+CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ // Ignore fastcc. Silence compiler warnings.
+ (void)RetFastCC_ARM_APCS;
+ (void)FastCC_ARM_APCS;
+ // Fallthrough
+ case CallingConv::C:
+ // Use target triple & subtarget features to do actual dispatch.
+ if (Subtarget->isAAPCS_ABI()) {
+ if (Subtarget->hasVFP2() &&
+ FloatABIType == FloatABI::Hard)
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ else
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ } else
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ case CallingConv::ARM_AAPCS_VFP:
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ case CallingConv::ARM_APCS:
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ }
+}
+
+bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes) {
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, false, TM, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(AdjStackDown))
+ .addImm(NumBytes));
+
+ // Process the args.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ unsigned Arg = ArgRegs[VA.getValNo()];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // We don't handle NEON/vector parameters yet.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+ return false;
+
+ // Handle arg promotion, etc.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt: {
+ bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::ZExt: {
+ bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::AExt: {
+ bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+
+ assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::BCvt: {
+ unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
+ /*TODO: Kill=*/false);
+ assert(BC != 0 && "Failed to emit a bitcast!");
+ Arg = BC;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ default: llvm_unreachable("Unknown arg promotion!");
+ }
+
+ // Now copy/store arg to correct locations.
+ if (VA.isRegLoc() && !VA.needsCustom()) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ VA.getLocReg())
+ .addReg(Arg);
+ RegArgs.push_back(VA.getLocReg());
+ } else if (VA.needsCustom()) {
+ // TODO: We need custom lowering for vector (v2f64) args.
+ if (VA.getLocVT() != MVT::f64) return false;
+
+ CCValAssign &NextVA = ArgLocs[++i];
+
+ // TODO: Only handle register args for now.
+ if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRRD), VA.getLocReg())
+ .addReg(NextVA.getLocReg(), RegState::Define)
+ .addReg(Arg));
+ RegArgs.push_back(VA.getLocReg());
+ RegArgs.push_back(NextVA.getLocReg());
+ } else {
+ assert(VA.isMemLoc());
+ // Need to store on the stack.
+ Address Addr;
+ Addr.BaseType = Address::RegBase;
+ Addr.Base.Reg = ARM::SP;
+ Addr.Offset = VA.getLocMemOffset();
+
+ if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
+ }
+ }
+ return true;
+}
+
+bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes) {
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(AdjStackUp))
+ .addImm(NumBytes).addImm(0));
+
+ // Now the return value.
+ if (RetVT != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
+
+ // Copy all of the result registers out of their specified physreg.
+ if (RVLocs.size() == 2 && RetVT == MVT::f64) {
+ // For this move we copy into two registers and then move into the
+ // double fp reg we want.
+ EVT DestVT = RVLocs[0].getValVT();
+ TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
+ unsigned ResultReg = createResultReg(DstRC);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVDRR), ResultReg)
+ .addReg(RVLocs[0].getLocReg())
+ .addReg(RVLocs[1].getLocReg()));
+
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[1].getLocReg());
+
+ // Finally update the result.
+ UpdateValueMap(I, ResultReg);
+ } else {
+ assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
+ EVT CopyVT = RVLocs[0].getValVT();
+ TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+
+ unsigned ResultReg = createResultReg(DstRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+
+ // Finally update the result.
+ UpdateValueMap(I, ResultReg);
+ }
+ }
+
+ return true;
+}
+
+bool ARMFastISel::SelectRet(const Instruction *I) {
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ if (F.isVarArg())
+ return false;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
+
+ const Value *RV = Ret->getOperand(0);
+ unsigned Reg = getRegForValue(RV);
+ if (Reg == 0)
+ return false;
+
+ // Only handle a single return value for now.
+ if (ValLocs.size() != 1)
+ return false;
+
+ CCValAssign &VA = ValLocs[0];
+
+ // Don't bother handling odd stuff for now.
+ if (VA.getLocInfo() != CCValAssign::Full)
+ return false;
+ // Only handle register returns for now.
+ if (!VA.isRegLoc())
+ return false;
+ // TODO: For now, don't try to handle cases where getLocInfo()
+ // says Full but the types don't match.
+ if (TLI.getValueType(RV->getType()) != VA.getValVT())
+ return false;
+
+ // Make the copy.
+ unsigned SrcReg = Reg + VA.getValNo();
+ unsigned DstReg = VA.getLocReg();
+ const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ // Avoid a cross-class copy. This is very unlikely.
+ if (!SrcRC->contains(DstReg))
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ DstReg).addReg(SrcReg);
+
+ // Mark the register as live out of the function.
+ MRI.addLiveOut(VA.getLocReg());
+ }
+
+ unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(RetOpc)));
+ return true;
+}
+
+// A quick function that will emit a call for a named libcall in F with the
+// vector of passed arguments for the Instruction in I. We can assume that we
+// can emit a call for any libcall we can produce. This is an abridged version
+// of the full call infrastructure since we won't need to worry about things
+// like computed function pointers or strange arguments at call sites.
+// TODO: Try to unify this and the normal call bits for ARM, then try to unify
+// with X86.
+bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
+ CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
+
+ // Handle *simple* calls for now.
+ const Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ // For now we're using BLX etc on the assumption that we have v5t ops.
+ if (!Subtarget->hasV5TOps()) return false;
+
+ // TODO: For now if we have long calls specified we don't handle the call.
+ if (EnableARMLongCalls) return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(I->getNumOperands());
+ ArgRegs.reserve(I->getNumOperands());
+ ArgVTs.reserve(I->getNumOperands());
+ ArgFlags.reserve(I->getNumOperands());
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Value *Op = I->getOperand(i);
+ unsigned Arg = getRegForValue(Op);
+ if (Arg == 0) return false;
+
+ const Type *ArgTy = Op->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT)) return false;
+
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(Op);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Handle the arguments now that we've gotten them.
+ SmallVector<unsigned, 4> RegArgs;
+ unsigned NumBytes;
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ return false;
+
+ // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+ // TODO: Turn this into the table of arm call ops.
+ MachineInstrBuilder MIB;
+ unsigned CallOpc;
+ if(isThumb) {
+ CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc)))
+ .addExternalSymbol(TLI.getLibcallName(Call));
+ } else {
+ CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc))
+ .addExternalSymbol(TLI.getLibcallName(Call)));
+ }
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i]);
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+bool ARMFastISel::SelectCall(const Instruction *I) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Can't handle inline asm or worry about intrinsics yet.
+ if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
+
+ // Only handle global variable Callees that are direct calls.
+ const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+ if (!GV || Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()))
+ return false;
+
+ // Check the calling convention.
+ ImmutableCallSite CS(CI);
+ CallingConv::ID CC = CS.getCallingConv();
+
+ // TODO: Avoid some calling conventions?
+
+ // Let SDISel handle vararg functions.
+ const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ if (FTy->isVarArg())
+ return false;
+
+ // Handle *simple* calls for now.
+ const Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ // For now we're using BLX etc on the assumption that we have v5t ops.
+ // TODO: Maybe?
+ if (!Subtarget->hasV5TOps()) return false;
+
+ // TODO: For now if we have long calls specified we don't handle the call.
+ if (EnableARMLongCalls) return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(CS.arg_size());
+ ArgRegs.reserve(CS.arg_size());
+ ArgVTs.reserve(CS.arg_size());
+ ArgFlags.reserve(CS.arg_size());
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ unsigned Arg = getRegForValue(*i);
+
+ if (Arg == 0)
+ return false;
+ ISD::ArgFlagsTy Flags;
+ unsigned AttrInd = i - CS.arg_begin() + 1;
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ Flags.setZExt();
+
+ // FIXME: Only handle *easy* calls for now.
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+ CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+ CS.paramHasAttr(AttrInd, Attribute::ByVal))
+ return false;
+
+ const Type *ArgTy = (*i)->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT))
+ return false;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(*i);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Handle the arguments now that we've gotten them.
+ SmallVector<unsigned, 4> RegArgs;
+ unsigned NumBytes;
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ return false;
+
+ // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+ // TODO: Turn this into the table of arm call ops.
+ MachineInstrBuilder MIB;
+ unsigned CallOpc;
+ // Explicitly adding the predicate here.
+ if(isThumb) {
+ CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc)))
+ .addGlobalAddress(GV, 0, 0);
+ } else {
+ CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc))
+ .addGlobalAddress(GV, 0, 0));
+ }
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i]);
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+
+}
+
+// TODO: SoftFP support.
+bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ return SelectLoad(I);
+ case Instruction::Store:
+ return SelectStore(I);
+ case Instruction::Br:
+ return SelectBranch(I);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return SelectCmp(I);
+ case Instruction::FPExt:
+ return SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return SelectFPTrunc(I);
+ case Instruction::SIToFP:
+ return SelectSIToFP(I);
+ case Instruction::FPToSI:
+ return SelectFPToSI(I);
+ case Instruction::FAdd:
+ return SelectBinaryOp(I, ISD::FADD);
+ case Instruction::FSub:
+ return SelectBinaryOp(I, ISD::FSUB);
+ case Instruction::FMul:
+ return SelectBinaryOp(I, ISD::FMUL);
+ case Instruction::SDiv:
+ return SelectSDiv(I);
+ case Instruction::SRem:
+ return SelectSRem(I);
+ case Instruction::Call:
+ return SelectCall(I);
+ case Instruction::Select:
+ return SelectSelect(I);
+ case Instruction::Ret:
+ return SelectRet(I);
+ default: break;
+ }
+ return false;
+}
+
+namespace llvm {
+ llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+ // Completely untested on non-darwin.
+ const TargetMachine &TM = funcInfo.MF->getTarget();
+
+ // Darwin and thumb1 only for now.
+ const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() &&
+ !DisableARMFastISel)
+ return new ARMFastISel(funcInfo);
+ return 0;
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h
new file mode 100644
index 0000000..3d175e3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h
@@ -0,0 +1,97 @@
+//===-- ARM/ARMFixupKinds.h - ARM Specific Fixup Entries --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARM_ARMFIXUPKINDS_H
+#define LLVM_ARM_ARMFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace ARM {
+enum Fixups {
+ // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol
+ // addresses
+ fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind,
+
+ // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with
+ // the 16-bit halfwords reordered.
+ fixup_t2_ldst_pcrel_12,
+
+ // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses
+ // used in VFP instructions where the lower 2 bits are not encoded
+ // (so it's encoded as an 8-bit immediate).
+ fixup_arm_pcrel_10,
+ // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for
+ // the short-swapped encoding of Thumb2 instructions.
+ fixup_t2_pcrel_10,
+ // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol
+ // addresses where the lower 2 bits are not encoded (so it's encoded as an
+ // 8-bit immediate).
+ fixup_thumb_adr_pcrel_10,
+ // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+ // instruction.
+ fixup_arm_adr_pcrel_12,
+ // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+ // instruction.
+ fixup_t2_adr_pcrel_12,
+ // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch
+ // instructions.
+ fixup_arm_condbranch,
+ // fixup_arm_uncondbranch - 24-bit PC relative relocation for
+ // branch instructions. (unconditional)
+ fixup_arm_uncondbranch,
+ // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct
+ // uconditional branch instructions.
+ fixup_t2_condbranch,
+ // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct
+ // branch unconditional branch instructions.
+ fixup_t2_uncondbranch,
+
+ // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions.
+ fixup_arm_thumb_br,
+
+ // fixup_arm_thumb_blx - Fixup for Thumb BL instructions.
+ fixup_arm_thumb_bl,
+
+ // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions.
+ fixup_arm_thumb_blx,
+
+ // fixup_arm_thumb_cb - Fixup for Thumb branch instructions.
+ fixup_arm_thumb_cb,
+
+ // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs.
+ fixup_arm_thumb_cp,
+
+ // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions.
+ fixup_arm_thumb_bcc,
+
+ // The next two are for the movt/movw pair
+ // the 16bit imm field are split into imm{15-12} and imm{11-0}
+ fixup_arm_movt_hi16, // :upper16:
+ fixup_arm_movw_lo16, // :lower16:
+ fixup_t2_movt_hi16, // :upper16:
+ fixup_t2_movw_lo16, // :lower16:
+
+ // It is possible to create an "immediate" that happens to be pcrel.
+ // movw r0, :lower16:Foo-(Bar+8) and movt r0, :upper16:Foo-(Bar+8)
+ // result in different reloc tags than the above two.
+ // Needed to support ELF::R_ARM_MOVT_PREL and ELF::R_ARM_MOVW_PREL_NC
+ fixup_arm_movt_hi16_pcrel, // :upper16:
+ fixup_arm_movw_lo16_pcrel, // :lower16:
+ fixup_t2_movt_hi16_pcrel, // :upper16:
+ fixup_t2_movw_lo16_pcrel, // :lower16:
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
new file mode 100644
index 0000000..f42c6db
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -0,0 +1,1021 @@
+//=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMFrameLowering.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+
+ // Mac OS X requires FP not to be clobbered for backtracing purpose.
+ if (STI.isTargetDarwin())
+ return true;
+
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Always eliminate non-leaf frame pointers.
+ return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
+ RegInfo->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken());
+}
+
+/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+/// not required, we reserve argument space for call sites in the function
+/// immediately on entry to the current function. This eliminates the need for
+/// add/sub sp brackets around call sites. Returns true if the call frame is
+/// included as part of the stack frame.
+bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ unsigned CFSize = FFI->getMaxCallFrameSize();
+ // It's not always a good idea to include the call frame as part of the
+ // stack frame. ARM (especially Thumb) has small immediate offset to
+ // address the stack frame. So a large call frame can cause poor codegen
+ // and may even makes it impossible to scavenge a register.
+ if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
+ return false;
+
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
+/// call frame pseudos can be simplified. Unlike most targets, having a FP
+/// is not sufficient here since we still may reference some objects via SP
+/// even when FP is available in Thumb2 mode.
+bool
+ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
+ return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+static bool isCSRestore(MachineInstr *MI,
+ const ARMBaseInstrInfo &TII,
+ const unsigned *CSRegs) {
+ // Integer spill area is handled with "pop".
+ if (MI->getOpcode() == ARM::LDMIA_RET ||
+ MI->getOpcode() == ARM::t2LDMIA_RET ||
+ MI->getOpcode() == ARM::LDMIA_UPD ||
+ MI->getOpcode() == ARM::t2LDMIA_UPD ||
+ MI->getOpcode() == ARM::VLDMDIA_UPD) {
+ // The first two operands are predicates. The last two are
+ // imp-def and imp-use of SP. Check everything in between.
+ for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
+ if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+ return false;
+ return true;
+ }
+ if ((MI->getOpcode() == ARM::LDR_POST ||
+ MI->getOpcode() == ARM::t2LDR_POST) &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
+ MI->getOperand(1).getReg() == ARM::SP)
+ return true;
+
+ return false;
+}
+
+static void
+emitSPUpdate(bool isARM,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, const ARMBaseInstrInfo &TII,
+ int NumBytes,
+ ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ Pred, PredReg, TII);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ Pred, PredReg, TII);
+}
+
+void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This emitPrologue does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned NumBytes = MFI->getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+
+ // Allocate the vararg register save area. This is not counted in NumBytes.
+ if (VARegSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+ return;
+ }
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ break;
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ if (STI.isTargetDarwin()) {
+ AFI->addGPRCalleeSavedArea2Frame(FI);
+ GPRCS2Size += 4;
+ } else {
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ }
+ break;
+ default:
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
+ }
+
+ // Move past area 1.
+ if (GPRCS1Size > 0) MBBI++;
+
+ // Set FP to point to the stack slot that contains the previous FP.
+ // For Darwin, FP is R7, which has now been stored in spill area 1.
+ // Otherwise, if this is not Darwin, all the callee-saved registers go
+ // into spill area 1, including the FP in R11. In either case, it is
+ // now safe to emit this assignment.
+ bool HasFP = hasFP(MF);
+ if (HasFP) {
+ unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
+ .addFrameIndex(FramePtrSpillFI).addImm(0);
+ AddDefaultCC(AddDefaultPred(MIB));
+ }
+
+ // Move past area 2.
+ if (GPRCS2Size > 0) MBBI++;
+
+ // Determine starting offsets of spill areas.
+ unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+ unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ if (HasFP)
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+ NumBytes);
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+ // Move past area 3.
+ if (DPRCSSize > 0) MBBI++;
+
+ NumBytes = DPRCSOffset;
+ if (NumBytes) {
+ // Adjust SP after all the callee-save spills.
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+ if (HasFP && isARM)
+ // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
+ // Note it's not safe to do this in Thumb2 mode because it would have
+ // taken two instructions:
+ // mov sp, r7
+ // sub sp, #24
+ // If an interrupt is taken between the two instructions, then sp is in
+ // an inconsistent state (pointing to the middle of callee-saved area).
+ // The interrupt handler can end up clobbering the registers.
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ if (STI.isTargetELF() && hasFP(MF))
+ MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
+
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // If we need dynamic stack realignment, do it here. Be paranoid and make
+ // sure if we also have VLAs, we have a base pointer for frame access.
+ if (RegInfo->needsStackRealignment(MF)) {
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert (!AFI->isThumb1OnlyFunction());
+ if (!AFI->isThumbFunction()) {
+ // Emit bic sp, sp, MaxAlign
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::BICri), ARM::SP)
+ .addReg(ARM::SP, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ } else {
+ // We cannot use sp as source/dest register here, thus we're emitting the
+ // following sequence:
+ // mov r4, sp
+ // bic r4, r4, MaxAlign
+ // mov sp, r4
+ // FIXME: It will be better just to find spare register here.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R4)
+ .addReg(ARM::SP, RegState::Kill);
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::t2BICri), ARM::R4)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+ .addReg(ARM::R4, RegState::Kill);
+ }
+
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (RegInfo->hasBasePointer(MF)) {
+ if (isARM)
+ BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::MOVr), RegInfo->getBaseRegister())
+ .addReg(ARM::SP)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::tMOVgpr2gpr), RegInfo->getBaseRegister())
+ .addReg(ARM::SP);
+ }
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp. We can assume there's an FP here since hasFP already
+ // checks for hasVarSizedObjects.
+ if (MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
+}
+
+void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getDesc().isReturn() &&
+ "Can only insert epilog into returning blocks");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl = MBBI->getDebugLoc();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This emitEpilogue does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+ } else {
+ // Unwind MBBI to point to first LDR / VLDRD.
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ if (MBBI != MBB.begin()) {
+ do
+ --MBBI;
+ while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
+ if (!isCSRestore(MBBI, TII, CSRegs))
+ ++MBBI;
+ }
+
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedAreaSize());
+
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (AFI->shouldRestoreSPFromFP()) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ if (NumBytes) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ else {
+ // It's not possible to restore SP from FP in a single instruction.
+ // For Darwin, this looks like:
+ // mov sp, r7
+ // sub sp, #24
+ // This is bad, if an interrupt is taken after the mov, sp is in an
+ // inconsistent state.
+ // Use the first callee-saved register as a scratch register.
+ assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ "No scratch register to restore SP from FP!");
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+ .addReg(ARM::R4);
+ }
+ } else {
+ // Thumb2 or ARM.
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+ .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+ .addReg(FramePtr);
+ }
+ } else if (NumBytes)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+
+ // Increment past our save areas.
+ if (AFI->getDPRCalleeSavedAreaSize()) MBBI++;
+ if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
+ if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
+ }
+
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
+ RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+ // Jump to label or value in register.
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
+ unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
+ ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)
+ : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+ } else if (RetOpcode == ARM::TCRETURNri) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ } else if (RetOpcode == ARM::TCRETURNriND) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = prior(MBBI);
+ for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ }
+
+ if (VARegSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
+}
+
+/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
+/// debug info. It's the same as what we use for resolving the code-gen
+/// references for now. FIXME: This can go wrong when references are
+/// SP-relative and simple call frames aren't used.
+int
+ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
+ return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
+}
+
+int
+ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg,
+ int SPAdj) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+ int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+ bool isFixed = MFI->isFixedObjectIndex(FI);
+
+ FrameReg = ARM::SP;
+ Offset += SPAdj;
+ if (AFI->isGPRCalleeSavedArea1Frame(FI))
+ return Offset - AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FI))
+ return Offset - AFI->getGPRCalleeSavedArea2Offset();
+ else if (AFI->isDPRCalleeSavedAreaFrame(FI))
+ return Offset - AFI->getDPRCalleeSavedAreaOffset();
+
+ // When dynamically realigning the stack, use the frame pointer for
+ // parameters, and the stack/base pointer for locals.
+ if (RegInfo->needsStackRealignment(MF)) {
+ assert (hasFP(MF) && "dynamic stack realignment without a FP!");
+ if (isFixed) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ Offset = FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(RegInfo->hasBasePointer(MF) &&
+ "VLAs and dynamic stack alignment, but missing base pointer!");
+ FrameReg = RegInfo->getBaseRegister();
+ }
+ return Offset;
+ }
+
+ // If there is a frame pointer, use it when we can.
+ if (hasFP(MF) && AFI->hasStackFrame()) {
+ // Use frame pointer to reference fixed objects. Use it for locals if
+ // there are VLAs (and thus the SP isn't reliable as a base).
+ if (isFixed || (MFI->hasVarSizedObjects() &&
+ !RegInfo->hasBasePointer(MF))) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
+ // Try to use the frame pointer if we can, else use the base pointer
+ // since it's available. This is handy for the emergency spill slot, in
+ // particular.
+ if (AFI->isThumb2Function()) {
+ if (FPOffset >= -255 && FPOffset < 0) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ } else
+ FrameReg = RegInfo->getBaseRegister();
+ } else if (AFI->isThumb2Function()) {
+ // In Thumb2 mode, the negative offset is very limited. Try to avoid
+ // out of range references.
+ if (FPOffset >= -255 && FPOffset < 0) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
+ // Otherwise, use SP or FP, whichever is closer to the stack slot.
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ }
+ // Use the base pointer if we have one.
+ if (RegInfo->hasBasePointer(MF))
+ FrameReg = RegInfo->getBaseRegister();
+ return Offset;
+}
+
+int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ unsigned FrameReg;
+ return getFrameIndexReference(MF, FI, FrameReg);
+}
+
+void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ unsigned StmOpc, unsigned StrOpc,
+ bool NoGap,
+ bool(*Func)(unsigned, bool)) const {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ SmallVector<std::pair<unsigned,bool>, 4> Regs;
+ unsigned i = CSI.size();
+ while (i != 0) {
+ unsigned LastReg = 0;
+ for (; i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for
+ // @llvm.returnaddress then it's already added to the function and
+ // entry block live-in sets.
+ bool isKill = true;
+ if (Reg == ARM::LR) {
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ // If NoGap is true, push consecutive registers and then leave the rest
+ // for other instructions. e.g.
+ // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
+ if (NoGap && LastReg && LastReg != Reg-1)
+ break;
+ LastReg = Reg;
+ Regs.push_back(std::make_pair(Reg, isKill));
+ }
+
+ if (Regs.empty())
+ continue;
+ if (Regs.size() > 1 || StrOpc== 0) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
+ .addReg(ARM::SP));
+ for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+ MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
+ } else if (Regs.size() == 1) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
+ ARM::SP)
+ .addReg(Regs[0].first, getKillRegState(Regs[0].second))
+ .addReg(ARM::SP);
+ // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
+ // that refactoring is complete (eventually).
+ if (StrOpc == ARM::STR_PRE) {
+ MIB.addReg(0);
+ MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::sub, 4, ARM_AM::no_shift));
+ } else
+ MIB.addImm(-4);
+ AddDefaultPred(MIB);
+ }
+ Regs.clear();
+ }
+}
+
+void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ unsigned LdmOpc, unsigned LdrOpc,
+ bool isVarArg, bool NoGap,
+ bool(*Func)(unsigned, bool)) const {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned RetOpcode = MI->getOpcode();
+ bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
+ RetOpcode == ARM::TCRETURNdiND ||
+ RetOpcode == ARM::TCRETURNri ||
+ RetOpcode == ARM::TCRETURNriND);
+
+ SmallVector<unsigned, 4> Regs;
+ unsigned i = CSI.size();
+ while (i != 0) {
+ unsigned LastReg = 0;
+ bool DeleteRet = false;
+ for (; i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+ if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
+ Reg = ARM::PC;
+ LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+ // Fold the return instruction into the LDM.
+ DeleteRet = true;
+ }
+
+ // If NoGap is true, pop consecutive registers and then leave the rest
+ // for other instructions. e.g.
+ // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
+ if (NoGap && LastReg && LastReg != Reg-1)
+ break;
+
+ LastReg = Reg;
+ Regs.push_back(Reg);
+ }
+
+ if (Regs.empty())
+ continue;
+ if (Regs.size() > 1 || LdrOpc == 0) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
+ .addReg(ARM::SP));
+ for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+ MIB.addReg(Regs[i], getDefRegState(true));
+ if (DeleteRet)
+ MI->eraseFromParent();
+ MI = MIB;
+ } else if (Regs.size() == 1) {
+ // If we adjusted the reg to PC from LR above, switch it back here. We
+ // only do that for LDM.
+ if (Regs[0] == ARM::PC)
+ Regs[0] = ARM::LR;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP);
+ // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
+ // that refactoring is complete (eventually).
+ if (LdrOpc == ARM::LDR_POST) {
+ MIB.addReg(0);
+ MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
+ } else
+ MIB.addImm(4);
+ AddDefaultPred(MIB);
+ }
+ Regs.clear();
+ }
+}
+
+bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
+ unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE;
+ unsigned FltOpc = ARM::VSTMDDB_UPD;
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register);
+ emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register);
+
+ return true;
+}
+
+bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+
+ unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
+ unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST;
+ unsigned FltOpc = ARM::VLDMDIA_UPD;
+ emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea2Register);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea1Register);
+
+ return true;
+}
+
+// FIXME: Make generic?
+static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
+ const ARMBaseInstrInfo &TII) {
+ unsigned FnSize = 0;
+ for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
+ I != E; ++I)
+ FnSize += TII.GetInstSizeInBytes(I);
+ }
+ return FnSize;
+}
+
+/// estimateStackSize - Estimate and return the size of the frame.
+/// FIXME: Make generic?
+static unsigned estimateStackSize(MachineFunction &MF) {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ int Offset = 0;
+ for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -FFI->getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+ if (FFI->isDeadObjectIndex(i))
+ continue;
+ Offset += FFI->getObjectSize(i);
+ unsigned Align = FFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+ }
+ return (unsigned)Offset;
+}
+
+/// estimateRSStackSizeLimit - Look at each instruction that references stack
+/// frames and return the stack size limit beyond which some of these
+/// instructions will require a scratch register during their expansion later.
+// FIXME: Move to TII?
+static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
+ const TargetFrameLowering *TFI) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned Limit = (1 << 12) - 1;
+ for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (!I->getOperand(i).isFI()) continue;
+
+ // When using ADDri to get the address of a stack object, 255 is the
+ // largest offset guaranteed to fit in the immediate offset.
+ if (I->getOpcode() == ARM::ADDri) {
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ }
+
+ // Otherwise check the addressing mode.
+ switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
+ case ARMII::AddrMode3:
+ case ARMII::AddrModeT2_i8:
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode5:
+ case ARMII::AddrModeT2_i8s4:
+ Limit = std::min(Limit, ((1U << 8) - 1) * 4);
+ break;
+ case ARMII::AddrModeT2_i12:
+ // i12 supports only positive offset so these will be converted to
+ // i8 opcodes. See llvm::rewriteT2FrameIndex.
+ if (TFI->hasFP(MF) && AFI->hasStackFrame())
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode4:
+ case ARMII::AddrMode6:
+ // Addressing modes 4 & 6 (load/store) instructions can't encode an
+ // immediate offset for stack references.
+ return 0;
+ default:
+ break;
+ }
+ break; // At most one FI per instruction
+ }
+ }
+ }
+
+ return Limit;
+}
+
+void
+ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // This tells PEI to spill the FP as if it is any other callee-save register
+ // to take advantage the eliminateFrameIndex machinery. This also ensures it
+ // is spilled in the order specified by getCalleeSavedRegs() to make it easier
+ // to combine multiple loads / stores.
+ bool CanEliminateFrame = true;
+ bool CS1Spilled = false;
+ bool LRSpilled = false;
+ unsigned NumGPRSpills = 0;
+ SmallVector<unsigned, 4> UnspilledCS1GPRs;
+ SmallVector<unsigned, 4> UnspilledCS2GPRs;
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ // Spill R4 if Thumb2 function requires stack realignment - it will be used as
+ // scratch register. Also spill R4 if Thumb2 function has varsized objects,
+ // since it's not always possible to restore sp from fp in a single
+ // instruction.
+ // FIXME: It will be better just to find spare register here.
+ if (AFI->isThumb2Function() &&
+ (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
+ MF.getRegInfo().setPhysRegUsed(ARM::R4);
+
+ if (AFI->isThumb1OnlyFunction()) {
+ // Spill LR if Thumb1 function uses variable length argument lists.
+ if (AFI->getVarArgsRegSaveSize() > 0)
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+
+ // Spill R4 if Thumb1 epilogue has to restore SP from FP since
+ // FIXME: It will be better just to find spare register here.
+ if (MFI->hasVarSizedObjects())
+ MF.getRegInfo().setPhysRegUsed(ARM::R4);
+ }
+
+ // Spill the BasePtr if it's used.
+ if (RegInfo->hasBasePointer(MF))
+ MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
+
+ // Don't spill FP if the frame can be eliminated. This is determined
+ // by scanning the callee-save registers to see if any is used.
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ bool Spilled = false;
+ if (MF.getRegInfo().isPhysRegUsed(Reg)) {
+ Spilled = true;
+ CanEliminateFrame = false;
+ } else {
+ // Check alias registers too.
+ for (const unsigned *Aliases =
+ RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) {
+ if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
+ Spilled = true;
+ CanEliminateFrame = false;
+ }
+ }
+ }
+
+ if (!ARM::GPRRegisterClass->contains(Reg))
+ continue;
+
+ if (Spilled) {
+ NumGPRSpills++;
+
+ if (!STI.isTargetDarwin()) {
+ if (Reg == ARM::LR)
+ LRSpilled = true;
+ CS1Spilled = true;
+ continue;
+ }
+
+ // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+ switch (Reg) {
+ case ARM::LR:
+ LRSpilled = true;
+ // Fallthrough
+ case ARM::R4: case ARM::R5:
+ case ARM::R6: case ARM::R7:
+ CS1Spilled = true;
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (!STI.isTargetDarwin()) {
+ UnspilledCS1GPRs.push_back(Reg);
+ continue;
+ }
+
+ switch (Reg) {
+ case ARM::R4: case ARM::R5:
+ case ARM::R6: case ARM::R7:
+ case ARM::LR:
+ UnspilledCS1GPRs.push_back(Reg);
+ break;
+ default:
+ UnspilledCS2GPRs.push_back(Reg);
+ break;
+ }
+ }
+ }
+
+ bool ForceLRSpill = false;
+ if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
+ unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
+ // Force LR to be spilled if the Thumb function size is > 2048. This enables
+ // use of BL to implement far jump. If it turns out that it's not needed
+ // then the branch fix up path will undo it.
+ if (FnSize >= (1 << 11)) {
+ CanEliminateFrame = false;
+ ForceLRSpill = true;
+ }
+ }
+
+ // If any of the stack slot references may be out of range of an immediate
+ // offset, make sure a register (or a spill slot) is available for the
+ // register scavenger. Note that if we're indexing off the frame pointer, the
+ // effective stack size is 4 bytes larger since the FP points to the stack
+ // slot of the previous FP. Also, if we have variable sized objects in the
+ // function, stack slot references will often be negative, and some of
+ // our instructions are positive-offset only, so conservatively consider
+ // that case to want a spill slot (or register) as well. Similarly, if
+ // the function adjusts the stack pointer during execution and the
+ // adjustments aren't already part of our stack size estimate, our offset
+ // calculations may be off, so be conservative.
+ // FIXME: We could add logic to be more precise about negative offsets
+ // and which instructions will need a scratch register for them. Is it
+ // worth the effort and added fragility?
+ bool BigStack =
+ (RS &&
+ (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ estimateRSStackSizeLimit(MF, this)))
+ || MFI->hasVarSizedObjects()
+ || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
+
+ bool ExtraCSSpill = false;
+ if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
+ AFI->setHasStackFrame(true);
+
+ // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+ // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+ if (!LRSpilled && CS1Spilled) {
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ NumGPRSpills++;
+ UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
+ UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+ ForceLRSpill = false;
+ ExtraCSSpill = true;
+ }
+
+ if (hasFP(MF)) {
+ MF.getRegInfo().setPhysRegUsed(FramePtr);
+ NumGPRSpills++;
+ }
+
+ // If stack and double are 8-byte aligned and we are spilling an odd number
+ // of GPRs, spill one extra callee save GPR so we won't have to pad between
+ // the integer and double callee save areas.
+ unsigned TargetAlign = getStackAlignment();
+ if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+ if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
+ for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
+ unsigned Reg = UnspilledCS1GPRs[i];
+ // Don't spill high register if the function is thumb1
+ if (!AFI->isThumb1OnlyFunction() ||
+ isARMLowRegister(Reg) || Reg == ARM::LR) {
+ MF.getRegInfo().setPhysRegUsed(Reg);
+ if (!RegInfo->isReservedReg(MF, Reg))
+ ExtraCSSpill = true;
+ break;
+ }
+ }
+ } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
+ unsigned Reg = UnspilledCS2GPRs.front();
+ MF.getRegInfo().setPhysRegUsed(Reg);
+ if (!RegInfo->isReservedReg(MF, Reg))
+ ExtraCSSpill = true;
+ }
+ }
+
+ // Estimate if we might need to scavenge a register at some point in order
+ // to materialize a stack offset. If so, either spill one additional
+ // callee-saved register or reserve a special spill slot to facilitate
+ // register scavenging. Thumb1 needs a spill slot for stack pointer
+ // adjustments also, even when the frame itself is small.
+ if (BigStack && !ExtraCSSpill) {
+ // If any non-reserved CS register isn't spilled, just spill one or two
+ // extra. That should take care of it!
+ unsigned NumExtras = TargetAlign / 4;
+ SmallVector<unsigned, 2> Extras;
+ while (NumExtras && !UnspilledCS1GPRs.empty()) {
+ unsigned Reg = UnspilledCS1GPRs.back();
+ UnspilledCS1GPRs.pop_back();
+ if (!RegInfo->isReservedReg(MF, Reg) &&
+ (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
+ Reg == ARM::LR)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ // For non-Thumb1 functions, also check for hi-reg CS registers
+ if (!AFI->isThumb1OnlyFunction()) {
+ while (NumExtras && !UnspilledCS2GPRs.empty()) {
+ unsigned Reg = UnspilledCS2GPRs.back();
+ UnspilledCS2GPRs.pop_back();
+ if (!RegInfo->isReservedReg(MF, Reg)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ }
+ if (Extras.size() && NumExtras == 0) {
+ for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+ MF.getRegInfo().setPhysRegUsed(Extras[i]);
+ }
+ } else if (!AFI->isThumb1OnlyFunction()) {
+ // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
+ // closest to SP or frame pointer.
+ const TargetRegisterClass *RC = ARM::GPRRegisterClass;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+ }
+ }
+
+ if (ForceLRSpill) {
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ AFI->setLRIsSpilledForFarJump(true);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
new file mode 100644
index 0000000..1288b70
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -0,0 +1,74 @@
+//==-- ARMTargetFrameLowering.h - Define frame lowering for ARM --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_FRAMEINFO_H
+#define ARM_FRAMEINFO_H
+
+#include "ARM.h"
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class ARMFrameLowering : public TargetFrameLowering {
+protected:
+ const ARMSubtarget &STI;
+
+public:
+ explicit ARMFrameLowering(const ARMSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
+ STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+ bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const;
+ int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg, int SPAdj) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+
+ private:
+ void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
+ unsigned StrOpc, bool NoGap,
+ bool(*Func)(unsigned, bool)) const;
+ void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc,
+ unsigned LdrOpc, bool isVarArg, bool NoGap,
+ bool(*Func)(unsigned, bool)) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp
new file mode 100644
index 0000000..3f02383
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp
@@ -0,0 +1,217 @@
+//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at
+// once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+// foo[i] = bar[i] * baz[i];
+// }
+//
+// On ARM the addresses of 3 arrays should be kept in the registers, thus
+// this code has quite large register pressure (loop body):
+//
+// ldr r1, [r5], #4
+// ldr r2, [r6], #4
+// mul r1, r2, r1
+// str r1, [r0], #4
+//
+// Pass converts the code to something like:
+//
+// static struct {
+// int foo[N];
+// int bar[N];
+// int baz[N];
+// } merged;
+//
+// for (i = 0; i < N; ++i) {
+// merged.foo[i] = merged.bar[i] * merged.baz[i];
+// }
+//
+// and in ARM code this becomes:
+//
+// ldr r0, [r5, #40]
+// ldr r1, [r5, #80]
+// mul r0, r1, r0
+// str r0, [r5], #4
+//
+// note that we saved 2 registers here almostly "for free".
+// ===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-global-merge"
+#include "ARM.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+using namespace llvm;
+
+namespace {
+ class ARMGlobalMerge : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// target type sizes.
+ const TargetLowering *TLI;
+
+ bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst) const;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ explicit ARMGlobalMerge(const TargetLowering *tli)
+ : FunctionPass(ID), TLI(tli) {}
+
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+
+ const char *getPassName() const {
+ return "Merge internal globals";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ struct GlobalCmp {
+ const TargetData *TD;
+
+ GlobalCmp(const TargetData *td) : TD(td) { }
+
+ bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ const Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ const Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+
+ return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
+ }
+ };
+ };
+} // end anonymous namespace
+
+char ARMGlobalMerge::ID = 0;
+
+bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst) const {
+ const TargetData *TD = TLI->getTargetData();
+
+ // FIXME: Infer the maximum possible offset depending on the actual users
+ // (these max offsets are different for the users inside Thumb or ARM
+ // functions)
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+
+ // FIXME: Find better heuristics
+ std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
+
+ const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+
+ for (size_t i = 0, e = Globals.size(); i != e; ) {
+ size_t j = 0;
+ uint64_t MergedSize = 0;
+ std::vector<const Type*> Tys;
+ std::vector<Constant*> Inits;
+ for (j = i; j != e; ++j) {
+ const Type *Ty = Globals[j]->getType()->getElementType();
+ MergedSize += TD->getTypeAllocSize(Ty);
+ if (MergedSize > MaxOffset) {
+ break;
+ }
+ Tys.push_back(Ty);
+ Inits.push_back(Globals[j]->getInitializer());
+ }
+
+ StructType *MergedTy = StructType::get(M.getContext(), Tys);
+ Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+ GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
+ GlobalValue::InternalLinkage,
+ MergedInit, "_MergedGlobals");
+ for (size_t k = i; k < j; ++k) {
+ Constant *Idx[2] = {
+ ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, k-i)
+ };
+ Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx, 2);
+ Globals[k]->replaceAllUsesWith(GEP);
+ Globals[k]->eraseFromParent();
+ }
+ i = j;
+ }
+
+ return true;
+}
+
+
+bool ARMGlobalMerge::doInitialization(Module &M) {
+ SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
+ const TargetData *TD = TLI->getTargetData();
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+ bool Changed = false;
+
+ // Grab all non-const globals.
+ for (Module::global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ // Merge is safe for "normal" internal globals only
+ if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
+ continue;
+
+ // Ignore fancy-aligned globals for now.
+ if (I->getAlignment() != 0)
+ continue;
+
+ // Ignore all 'special' globals.
+ if (I->getName().startswith("llvm.") ||
+ I->getName().startswith(".llvm."))
+ continue;
+
+ if (TD->getTypeAllocSize(I->getType()->getElementType()) < MaxOffset) {
+ const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering();
+ if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal())
+ BSSGlobals.push_back(I);
+ else if (I->isConstant())
+ ConstGlobals.push_back(I);
+ else
+ Globals.push_back(I);
+ }
+ }
+
+ if (Globals.size() > 1)
+ Changed |= doMerge(Globals, M, false);
+ if (BSSGlobals.size() > 1)
+ Changed |= doMerge(BSSGlobals, M, false);
+
+ // FIXME: This currently breaks the EH processing due to way how the
+ // typeinfo detection works. We might want to detect the TIs and ignore
+ // them in the future.
+ // if (ConstGlobals.size() > 1)
+ // Changed |= doMerge(ConstGlobals, M, true);
+
+ return Changed;
+}
+
+bool ARMGlobalMerge::runOnFunction(Function &F) {
+ return false;
+}
+
+FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
+ return new ARMGlobalMerge(tli);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
new file mode 100644
index 0000000..676b01e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -0,0 +1,121 @@
+//===-- ARMHazardRecognizer.cpp - ARM postra hazard recognizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMHazardRecognizer.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
+ const TargetRegisterInfo &TRI) {
+ // FIXME: Detect integer instructions properly.
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned Domain = TID.TSFlags & ARMII::DomainMask;
+ if (Domain == ARMII::DomainVFP) {
+ unsigned Opcode = MI->getOpcode();
+ if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
+ Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ } else if (Domain == ARMII::DomainNEON) {
+ if (MI->getDesc().mayStore() || MI->getDesc().mayLoad())
+ return false;
+ } else
+ return false;
+ return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ assert(Stalls == 0 && "ARM hazards don't support scoreboard lookahead");
+
+ MachineInstr *MI = SU->getInstr();
+
+ if (!MI->isDebugValue()) {
+ if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1])
+ return Hazard;
+
+ // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
+ // a VMLA / VMLS will cause 4 cycle stall.
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (LastMI && (TID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
+ MachineInstr *DefMI = LastMI;
+ const TargetInstrDesc &LastTID = LastMI->getDesc();
+ // Skip over one non-VFP / NEON instruction.
+ if (!LastTID.isBarrier() &&
+ (LastTID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
+ MachineBasicBlock::iterator I = LastMI;
+ if (I != LastMI->getParent()->begin()) {
+ I = llvm::prior(I);
+ DefMI = &*I;
+ }
+ }
+
+ if (TII.isFpMLxInstruction(DefMI->getOpcode()) &&
+ (TII.canCauseFpMLxStall(MI->getOpcode()) ||
+ hasRAWHazard(DefMI, MI, TRI))) {
+ // Try to schedule another instruction for the next 4 cycles.
+ if (FpMLxStalls == 0)
+ FpMLxStalls = 4;
+ return Hazard;
+ }
+ }
+ }
+
+ return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+void ARMHazardRecognizer::Reset() {
+ LastMI = 0;
+ FpMLxStalls = 0;
+ ITBlockSize = 0;
+ ScoreboardHazardRecognizer::Reset();
+}
+
+void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
+ MachineInstr *MI = SU->getInstr();
+ unsigned Opcode = MI->getOpcode();
+ if (ITBlockSize) {
+ --ITBlockSize;
+ } else if (Opcode == ARM::t2IT) {
+ unsigned Mask = MI->getOperand(1).getImm();
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ ITBlockSize = 4 - NumTZ;
+ MachineBasicBlock::iterator I = MI;
+ for (unsigned i = 0; i < ITBlockSize; ++i) {
+ // Advance to the next instruction, skipping any dbg_value instructions.
+ do {
+ ++I;
+ } while (I->isDebugValue());
+ ITBlockMIs[ITBlockSize-1-i] = &*I;
+ }
+ }
+
+ if (!MI->isDebugValue()) {
+ LastMI = MI;
+ FpMLxStalls = 0;
+ }
+
+ ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+void ARMHazardRecognizer::AdvanceCycle() {
+ if (FpMLxStalls && --FpMLxStalls == 0)
+ // Stalled for 4 cycles but still can't schedule any other instructions.
+ LastMI = 0;
+ ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void ARMHazardRecognizer::RecedeCycle() {
+ llvm_unreachable("reverse ARM hazard checking unsupported");
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
new file mode 100644
index 0000000..2bc218d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
@@ -0,0 +1,54 @@
+//===-- ARMHazardRecognizer.h - ARM Hazard Recognizers ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling ARM functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMHAZARDRECOGNIZER_H
+#define ARMHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+
+namespace llvm {
+
+class ARMBaseInstrInfo;
+class ARMBaseRegisterInfo;
+class ARMSubtarget;
+class MachineInstr;
+
+class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
+ const ARMBaseInstrInfo &TII;
+ const ARMBaseRegisterInfo &TRI;
+ const ARMSubtarget &STI;
+
+ MachineInstr *LastMI;
+ unsigned FpMLxStalls;
+ unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled.
+ MachineInstr *ITBlockMIs[4];
+
+public:
+ ARMHazardRecognizer(const InstrItineraryData *ItinData,
+ const ARMBaseInstrInfo &tii,
+ const ARMBaseRegisterInfo &tri,
+ const ARMSubtarget &sti,
+ const ScheduleDAG *DAG) :
+ ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
+ TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {}
+
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
+ virtual void Reset();
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ virtual void RecedeCycle();
+};
+
+} // end namespace llvm
+
+#endif // ARMHAZARDRECOGNIZER_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
new file mode 100644
index 0000000..a506cff
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -0,0 +1,2882 @@
+//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-isel"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMAddressingModes.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DisableShifterOp("disable-shifter-op", cl::Hidden,
+ cl::desc("Disable isel of shifter-op"),
+ cl::init(false));
+
+static cl::opt<bool>
+CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
+ cl::desc("Check fp vmla / vmls hazard at isel time"),
+ cl::init(false));
+
+//===--------------------------------------------------------------------===//
+/// ARMDAGToDAGISel - ARM specific code to select ARM machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+
+enum AddrMode2Type {
+ AM2_BASE, // Simple AM2 (+-imm12)
+ AM2_SHOP // Shifter-op AM2
+};
+
+class ARMDAGToDAGISel : public SelectionDAGISel {
+ ARMBaseTargetMachine &TM;
+ const ARMBaseInstrInfo *TII;
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+public:
+ explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel), TM(tm),
+ TII(static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo())),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM Instruction Selection";
+ }
+
+ /// getI32Imm - Return a target constant of type i32 with the specified
+ /// value.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ SDNode *Select(SDNode *N);
+
+
+ bool hasNoVMLxHazardUse(SDNode *N) const;
+ bool isShifterOpProfitable(const SDValue &Shift,
+ ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
+ bool SelectShifterOperandReg(SDValue N, SDValue &A,
+ SDValue &B, SDValue &C);
+ bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
+ SDValue &B, SDValue &C);
+ bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
+
+ AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
+ }
+
+ bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
+ }
+
+ bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ SelectAddrMode2Worker(N, Base, Offset, Opc);
+// return SelectAddrMode2ShOp(N, Base, Offset, Opc);
+ // This always matches one way or another.
+ return true;
+ }
+
+ bool SelectAddrMode2Offset(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode3(SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode5(SDValue N, SDValue &Base,
+ SDValue &Offset);
+ bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
+
+ bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
+
+ // Thumb Addressing Modes:
+ bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
+ unsigned Scale);
+ bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
+
+ // Thumb 2 Addressing Modes:
+ bool SelectT2ShifterOperandReg(SDValue N,
+ SDValue &BaseReg, SDValue &Opc);
+ bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
+ SDValue &OffImm);
+ bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
+ SDValue &OffReg, SDValue &ShImm);
+
+ inline bool is_so_imm(unsigned Imm) const {
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ }
+
+ inline bool is_so_imm_not(unsigned Imm) const {
+ return ARM_AM::getSOImmVal(~Imm) != -1;
+ }
+
+ inline bool is_t2_so_imm(unsigned Imm) const {
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ }
+
+ inline bool is_t2_so_imm_not(unsigned Imm) const {
+ return ARM_AM::getT2SOImmVal(~Imm) != -1;
+ }
+
+ inline bool Pred_so_imm(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ return is_so_imm(N->getZExtValue());
+ }
+
+ inline bool Pred_t2_so_imm(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ return is_t2_so_imm(N->getZExtValue());
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "ARMGenDAGISel.inc"
+
+private:
+ /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
+ /// ARM.
+ SDNode *SelectARMIndexedLoad(SDNode *N);
+ SDNode *SelectT2IndexedLoad(SDNode *N);
+
+ /// SelectVLD - Select NEON load intrinsics. NumVecs should be
+ /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// loads of D registers and even subregs and odd subregs of Q registers.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
+ SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes,
+ unsigned *QOpcodes0, unsigned *QOpcodes1);
+
+ /// SelectVST - Select NEON store intrinsics. NumVecs should
+ /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// stores of D registers and even subregs and odd subregs of Q registers.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
+ SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes,
+ unsigned *QOpcodes0, unsigned *QOpcodes1);
+
+ /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
+ /// be 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// load/store of D registers and Q registers.
+ SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
+ bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes, unsigned *QOpcodes);
+
+ /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
+ /// should be 2, 3 or 4. The opcode array specifies the instructions used
+ /// for loading D registers. (Q registers are not supported.)
+ SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *Opcodes);
+
+ /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
+ /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
+ /// generated to force the table registers to be consecutive.
+ SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
+
+ /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
+ SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
+
+ /// SelectCMOVOp - Select CMOV instructions for ARM.
+ SDNode *SelectCMOVOp(SDNode *N);
+ SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+ SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+ SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+ SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+
+ SDNode *SelectConcatVector(SDNode *N);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ // Form pairs of consecutive S, D, or Q registers.
+ SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
+ SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
+ SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
+
+ // Form sequences of 4 consecutive S, D, or Q registers.
+ SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+
+ // Get the alignment operand for a NEON VLD or VST instruction.
+ SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
+};
+}
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+ return isInt32Immediate(N.getNode(), Imm);
+}
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+ return N->getOpcode() == Opc &&
+ isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+/// \brief Check whether a particular node is a constant value representable as
+/// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax).
+///
+/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
+static bool isScaledConstantInRange(SDValue Node, unsigned Scale,
+ int RangeMin, int RangeMax,
+ int &ScaledConstant) {
+ assert(Scale && "Invalid scale!");
+
+ // Check that this is a constant.
+ const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
+ if (!C)
+ return false;
+
+ ScaledConstant = (int) C->getZExtValue();
+ if ((ScaledConstant % Scale) != 0)
+ return false;
+
+ ScaledConstant /= Scale;
+ return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
+}
+
+/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
+/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
+/// least on current ARM implementations) which should be avoidded.
+bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
+ if (OptLevel == CodeGenOpt::None)
+ return true;
+
+ if (!CheckVMLxHazard)
+ return true;
+
+ if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9())
+ return true;
+
+ if (!N->hasOneUse())
+ return false;
+
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::CopyToReg)
+ return true;
+ if (Use->isMachineOpcode()) {
+ const TargetInstrDesc &TID = TII->get(Use->getMachineOpcode());
+ if (TID.mayStore())
+ return true;
+ unsigned Opcode = TID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return true;
+ // vmlx feeding into another vmlx. We actually want to unfold
+ // the use later in the MLxExpansion pass. e.g.
+ // vmla
+ // vmla (stall 8 cycles)
+ //
+ // vmul (5 cycles)
+ // vadd (5 cycles)
+ // vmla
+ // This adds up to about 18 - 19 cycles.
+ //
+ // vmla
+ // vmul (stall 4 cycles)
+ // vadd adds up to about 14 cycles.
+ return TII->isFpMLxInstruction(Opcode);
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
+ ARM_AM::ShiftOpc ShOpcVal,
+ unsigned ShAmt) {
+ if (!Subtarget->isCortexA9())
+ return true;
+ if (Shift.hasOneUse())
+ return true;
+ // R << 2 is free.
+ return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
+}
+
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
+ SDValue &BaseReg,
+ SDValue &ShReg,
+ SDValue &Opc) {
+ if (DisableShifterOp)
+ return false;
+
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShReg = CurDAG->getRegister(0, MVT::i32);
+ ShImmVal = RHS->getZExtValue() & 31;
+ } else {
+ ShReg = N.getOperand(1);
+ if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal))
+ return false;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N,
+ SDValue &BaseReg,
+ SDValue &ShReg,
+ SDValue &Opc) {
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ // Do not check isShifterOpProfitable. This must return true.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShReg = CurDAG->getRegister(0, MVT::i32);
+ ShImmVal = RHS->getZExtValue() & 31;
+ } else {
+ ShReg = N.getOperand(1);
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
+ SDValue &Base,
+ SDValue &OffImm) {
+ // Match simple R + imm12 operands.
+
+ // Base only.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ // Match frame index.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ } else
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+
+
+bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::MUL &&
+ (!Subtarget->isCortexA9() || N.hasOneUse())) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ // X * [3,5,9] -> X + X * [2,4,8] etc.
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC & 1) {
+ RHSC = RHSC & ~1;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ if (isPowerOf2_32(RHSC)) {
+ unsigned ShAmt = Log2_32(RHSC);
+ Base = Offset = N.getOperand(0);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+ ARM_AM::lsl),
+ MVT::i32);
+ return true;
+ }
+ }
+ }
+ }
+
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ // ISD::OR that is equivalent to an ISD::ADD.
+ !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Leave simple R +/- imm12 operands for LDRi12
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -0x1000+1, 0x1000, RHSC)) // 12 bits.
+ return false;
+ }
+
+ if (Subtarget->isCortexA9() && !N.hasOneUse())
+ // Compute R +/- (R << N) and reuse it.
+ return false;
+
+ // Otherwise this is R +/- [possibly shifted] R.
+ ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+ Offset = N.getOperand(1).getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+ !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (!Subtarget->isCortexA9() ||
+ (N.hasOneUse() &&
+ isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+
+
+
+//-----
+
+AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
+ SDValue &Base,
+ SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::MUL &&
+ (!Subtarget->isCortexA9() || N.hasOneUse())) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ // X * [3,5,9] -> X + X * [2,4,8] etc.
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC & 1) {
+ RHSC = RHSC & ~1;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ if (isPowerOf2_32(RHSC)) {
+ unsigned ShAmt = Log2_32(RHSC);
+ Base = Offset = N.getOperand(0);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+ ARM_AM::lsl),
+ MVT::i32);
+ return AM2_SHOP;
+ }
+ }
+ }
+ }
+
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ // ISD::OR that is equivalent to an ADD.
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return AM2_BASE;
+ }
+
+ // Match simple R +/- imm12 operands.
+ if (N.getOpcode() != ISD::SUB) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -0x1000+1, 0x1000, RHSC)) { // 12 bits.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return AM2_BASE;
+ }
+ }
+
+ if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+ // Compute R +/- (R << N) and reuse it.
+ Base = N;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return AM2_BASE;
+ }
+
+ // Otherwise this is R +/- [possibly shifted] R.
+ ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+ Offset = N.getOperand(1).getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+ !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (!Subtarget->isCortexA9() ||
+ (N.hasOneUse() &&
+ isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return AM2_SHOP;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
+
+ Offset = N;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+ unsigned ShAmt = 0;
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
+ Offset = N.getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
+ SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::SUB) {
+ // X - C is canonicalize to X + -C, no need to handle it here.
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
+ return true;
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -256 + 1, 256, RHSC)) { // 8 bits.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = -RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
+ return true;
+ }
+
+ Offset = N;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
+ SDValue &Base, SDValue &Offset) {
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
+ -256 + 1, 256, RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = -RHSC;
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+ MVT::i32);
+ return true;
+ }
+
+ Base = N;
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
+ SDValue &Align) {
+ Addr = N;
+
+ unsigned Alignment = 0;
+ if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
+ // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
+ // The maximum alignment is equal to the memory size being referenced.
+ unsigned LSNAlign = LSN->getAlignment();
+ unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
+ if (LSNAlign > MemSize && MemSize > 1)
+ Alignment = MemSize;
+ } else {
+ // All other uses of addrmode6 are for intrinsics. For now just record
+ // the raw alignment value; it will be refined later based on the legal
+ // alignment operands for the intrinsic.
+ Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
+ }
+
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
+ SDValue &Offset, SDValue &Label) {
+ if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
+ Offset = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+ MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Thumb Addressing Modes
+//===----------------------------------------------------------------------===//
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
+ SDValue &Base, SDValue &Offset){
+ if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
+ ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
+ if (!NC || !NC->isNullValue())
+ return false;
+
+ Base = Offset = N;
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
+ SDValue &Offset, unsigned Scale) {
+ if (Scale == 4) {
+ SDValue TmpBase, TmpOffImm;
+ if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
+ return false; // We want to select tLDRspi / tSTRspi instead.
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select tLDRpci instead.
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Thumb does not have [sp, r] address mode.
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+ if ((LHSR && LHSR->getReg() == ARM::SP) ||
+ (RHSR && RHSR->getReg() == ARM::SP))
+ return false;
+
+ // FIXME: Why do we explicitly check for a match here and then return false?
+ // Presumably to allow something else to match, but shouldn't this be
+ // documented?
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
+ return false;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
+ SDValue &Base,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI(N, Base, Offset, 1);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
+ SDValue &Base,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI(N, Base, Offset, 2);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
+ SDValue &Base,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI(N, Base, Offset, 4);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
+ SDValue &Base, SDValue &OffImm) {
+ if (Scale == 4) {
+ SDValue TmpBase, TmpOffImm;
+ if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
+ return false; // We want to select tLDRspi / tSTRspi instead.
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select tLDRpci instead.
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ } else {
+ Base = N;
+ }
+
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+ if ((LHSR && LHSR->getReg() == ARM::SP) ||
+ (RHSR && RHSR->getReg() == ARM::SP)) {
+ ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
+ unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
+
+ // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
+ if (LHSC != 0 || RHSC != 0) return false;
+
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // If the RHS is + imm5 * scale, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
+ Base = N.getOperand(0);
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
+ (LHSR && LHSR->getReg() == ARM::SP)) {
+ // If the RHS is + imm8 * scale, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Thumb 2 Addressing Modes
+//===----------------------------------------------------------------------===//
+
+
+bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
+ SDValue &Opc) {
+ if (DisableShifterOp)
+ return false;
+
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShImmVal = RHS->getZExtValue() & 31;
+ Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ // Match simple R + imm12 operands.
+
+ // Base only.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ // Match frame index.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select t2LDRpci instead.
+ } else
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ if (SelectT2AddrModeImm8(N, Base, OffImm))
+ // Let t2LDRi8 handle (R - imm8).
+ return false;
+
+ int RHSC = (int)RHS->getZExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ // Match simple R - imm8 operands.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getSExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
+ SDValue &OffImm){
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ int RHSC;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
+ OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
+ ? CurDAG->getTargetConstant(RHSC, MVT::i32)
+ : CurDAG->getTargetConstant(-RHSC, MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
+ SDValue &Base,
+ SDValue &OffReg, SDValue &ShImm) {
+ // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
+ if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
+ return false;
+ else if (RHSC < 0 && RHSC >= -255) // 8 bits
+ return false;
+ }
+
+ if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+ // Compute R + (R << [1,2,3]) and reuse it.
+ Base = N;
+ return false;
+ }
+
+ // Look for (R + R) or (R + (R << [1,2,3])).
+ unsigned ShAmt = 0;
+ Base = N.getOperand(0);
+ OffReg = N.getOperand(1);
+
+ // Swap if it is ((R << c) + R).
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg);
+ if (ShOpcVal != ARM_AM::lsl) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(Base);
+ if (ShOpcVal == ARM_AM::lsl)
+ std::swap(Base, OffReg);
+ }
+
+ if (ShOpcVal == ARM_AM::lsl) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
+ OffReg = OffReg.getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32);
+
+ return true;
+}
+
+//===--------------------------------------------------------------------===//
+
+/// getAL - Returns a ARMCC::AL immediate node.
+static inline SDValue getAL(SelectionDAG *CurDAG) {
+ return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
+}
+
+SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return NULL;
+
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Offset, AMOpc;
+ bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ unsigned Opcode = 0;
+ bool Match = false;
+ if (LoadedVT == MVT::i32 &&
+ SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
+ Match = true;
+ } else if (LoadedVT == MVT::i16 &&
+ SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
+ ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
+ : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
+ } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
+ if (LD->getExtensionType() == ISD::SEXTLOAD) {
+ if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
+ }
+ } else {
+ if (SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
+ }
+ }
+ }
+
+ if (Match) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
+ MVT::Other, Ops, 6);
+ }
+
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return NULL;
+
+ EVT LoadedVT = LD->getMemoryVT();
+ bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
+ SDValue Offset;
+ bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ unsigned Opcode = 0;
+ bool Match = false;
+ if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ case MVT::i32:
+ Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
+ break;
+ case MVT::i16:
+ if (isSExtLd)
+ Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
+ else
+ Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
+ break;
+ case MVT::i8:
+ case MVT::i1:
+ if (isSExtLd)
+ Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
+ else
+ Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
+ break;
+ default:
+ return NULL;
+ }
+ Match = true;
+ }
+
+ if (Match) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, Offset, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
+ MVT::Other, Ops, 5);
+ }
+
+ return NULL;
+}
+
+/// PairSRegs - Form a D register from a pair of S registers.
+///
+SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
+/// PairDRegs - Form a quad register from a pair of D registers.
+///
+SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
+/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
+///
+SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
+/// QuadSRegs - Form 4 consecutive S registers.
+///
+SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// QuadDRegs - Form 4 consecutive D registers.
+///
+SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// QuadQRegs - Form 4 consecutive Q registers.
+///
+SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
+/// of a NEON VLD or VST instruction. The supported values depend on the
+/// number of registers being loaded.
+SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
+ bool is64BitVector) {
+ unsigned NumRegs = NumVecs;
+ if (!is64BitVector && NumVecs < 3)
+ NumRegs *= 2;
+
+ unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ if (Alignment >= 32 && NumRegs == 4)
+ Alignment = 32;
+ else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
+ Alignment = 16;
+ else if (Alignment >= 8)
+ Alignment = 8;
+ else
+ Alignment = 0;
+
+ return CurDAG->getTargetConstant(Alignment, MVT::i32);
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes, unsigned *QOpcodes0,
+ unsigned *QOpcodes1) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+ return NULL;
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ bool is64BitVector = VT.is64BitVector();
+ Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v1i64: OpcodeIndex = 3; break;
+ // Quad-register operations:
+ case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8i16: OpcodeIndex = 1; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
+ break;
+ }
+
+ EVT ResTy;
+ if (NumVecs == 1)
+ ResTy = VT;
+ else {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+ }
+ std::vector<EVT> ResTys;
+ ResTys.push_back(ResTy);
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDNode *VLd;
+ SmallVector<SDValue, 7> Ops;
+
+ // Double registers and VLD1/VLD2 quad registers are directly supported.
+ if (is64BitVector || NumVecs <= 2) {
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex]);
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+
+ } else {
+ // Otherwise, quad registers are loaded with two separate instructions,
+ // where one loads the even registers and the other loads the odd registers.
+ EVT AddrTy = MemAddr.getValueType();
+
+ // Load the even subregs. This is always an updating load, so that it
+ // provides the address to the second load for the odd subregs.
+ SDValue ImplDef =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
+ SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ ResTy, AddrTy, MVT::Other, OpsA, 7);
+ Chain = SDValue(VLdA, 2);
+
+ // Load the odd subregs.
+ Ops.push_back(SDValue(VLdA, 1));
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ assert(isa<ConstantSDNode>(Inc.getNode()) &&
+ "only constant post-increment update allowed for VLD3/4");
+ (void)Inc;
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(SDValue(VLdA, 0));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+ Ops.data(), Ops.size());
+ }
+
+ if (NumVecs == 1)
+ return VLd;
+
+ // Extract out the subregisters.
+ SDValue SuperReg = SDValue(VLd, 0);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+ ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes, unsigned *QOpcodes0,
+ unsigned *QOpcodes1) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+ return NULL;
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ bool is64BitVector = VT.is64BitVector();
+ Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vst type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v1i64: OpcodeIndex = 3; break;
+ // Quad-register operations:
+ case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8i16: OpcodeIndex = 1; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VST1");
+ break;
+ }
+
+ std::vector<EVT> ResTys;
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SmallVector<SDValue, 7> Ops;
+
+ // Double registers and VST1/VST2 quad registers are directly supported.
+ if (is64BitVector || NumVecs <= 2) {
+ SDValue SrcReg;
+ if (NumVecs == 1) {
+ SrcReg = N->getOperand(Vec0Idx);
+ } else if (is64BitVector) {
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ if (NumVecs == 2)
+ SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ // If it's a vst3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+ } else {
+ // Form a QQ register.
+ SDValue Q0 = N->getOperand(Vec0Idx);
+ SDValue Q1 = N->getOperand(Vec0Idx + 1);
+ SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
+ }
+
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex]);
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(SrcReg);
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ }
+
+ // Otherwise, quad registers are stored with two separate instructions,
+ // where one stores the even registers and the other stores the odd registers.
+
+ // Form the QQQQ REG_SEQUENCE.
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+
+ // Store the even D registers. This is always an updating store, so that it
+ // provides the address to the second store for the odd subregs.
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
+ SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ MemAddr.getValueType(),
+ MVT::Other, OpsA, 7);
+ Chain = SDValue(VStA, 1);
+
+ // Store the odd D registers.
+ Ops.push_back(SDValue(VStA, 0));
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ assert(isa<ConstantSDNode>(Inc.getNode()) &&
+ "only constant post-increment update allowed for VST3/4");
+ (void)Inc;
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(RegSeq);
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+ Ops.data(), Ops.size());
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
+ bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes,
+ unsigned *QOpcodes) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+ return NULL;
+
+ SDValue Chain = N->getOperand(0);
+ unsigned Lane =
+ cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ bool is64BitVector = VT.is64BitVector();
+
+ unsigned Alignment = 0;
+ if (NumVecs != 3) {
+ Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+ if (Alignment > NumBytes)
+ Alignment = NumBytes;
+ if (Alignment < 8 && Alignment < NumBytes)
+ Alignment = 0;
+ // Alignment must be a power of two; make sure of that.
+ Alignment = (Alignment & -Alignment);
+ if (Alignment == 1)
+ Alignment = 0;
+ }
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld/vst lane type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ // Quad-register operations:
+ case MVT::v8i16: OpcodeIndex = 0; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 1; break;
+ }
+
+ std::vector<EVT> ResTys;
+ if (IsLoad) {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
+ MVT::i64, ResTyElts));
+ }
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+
+ SDValue SuperReg;
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ if (NumVecs == 2) {
+ if (is64BitVector)
+ SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else
+ SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
+ } else {
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ if (is64BitVector)
+ SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ else
+ SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+ }
+ Ops.push_back(SuperReg);
+ Ops.push_back(getI32Imm(Lane));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes[OpcodeIndex]);
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
+ Ops.data(), Ops.size());
+ if (!IsLoad)
+ return VLdLn;
+
+ // Extract the subregisters.
+ SuperReg = SDValue(VLdLn, 0);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+ ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
+ unsigned NumVecs, unsigned *Opcodes) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
+ return NULL;
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ unsigned Alignment = 0;
+ if (NumVecs != 3) {
+ Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+ if (Alignment > NumBytes)
+ Alignment = NumBytes;
+ if (Alignment < 8 && Alignment < NumBytes)
+ Alignment = 0;
+ // Alignment must be a power of two; make sure of that.
+ Alignment = (Alignment & -Alignment);
+ if (Alignment == 1)
+ Alignment = 0;
+ }
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld-dup type");
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ }
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue SuperReg;
+ unsigned Opc = Opcodes[OpcodeIndex];
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(2);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ std::vector<EVT> ResTys;
+ ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts));
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+ SDNode *VLdDup =
+ CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ SuperReg = SDValue(VLdDup, 0);
+
+ // Extract the subregisters.
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ unsigned SubIdx = ARM::dsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
+ unsigned Opc) {
+ assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ unsigned FirstTblReg = IsExt ? 2 : 1;
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(FirstTblReg + 0);
+ SDValue V1 = N->getOperand(FirstTblReg + 1);
+ if (NumVecs == 2)
+ RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(FirstTblReg + 2);
+ // If it's a vtbl3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(FirstTblReg + 3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ SmallVector<SDValue, 6> Ops;
+ if (IsExt)
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(RegSeq);
+ Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
+ Ops.push_back(getAL(CurDAG)); // predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+}
+
+SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
+ bool isSigned) {
+ if (!Subtarget->hasV6T2Ops())
+ return NULL;
+
+ unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
+ : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
+
+
+ // For unsigned extracts, check for a shift right and mask
+ unsigned And_imm = 0;
+ if (N->getOpcode() == ISD::AND) {
+ if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
+
+ // The immediate is a mask of the low bits iff imm & (imm+1) == 0
+ if (And_imm & (And_imm + 1))
+ return NULL;
+
+ unsigned Srl_imm = 0;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
+ Srl_imm)) {
+ assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+
+ unsigned Width = CountTrailingOnes_32(And_imm);
+ unsigned LSB = Srl_imm;
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, MVT::i32),
+ CurDAG->getTargetConstant(Width, MVT::i32),
+ getAL(CurDAG), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+ }
+ return NULL;
+ }
+
+ // Otherwise, we're looking for a shift of a shift
+ unsigned Shl_imm = 0;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
+ assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
+ unsigned Srl_imm = 0;
+ if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
+ assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+ unsigned Width = 32 - Srl_imm;
+ int LSB = Srl_imm - Shl_imm;
+ if (LSB < 0)
+ return NULL;
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, MVT::i32),
+ CurDAG->getTargetConstant(Width, MVT::i32),
+ getAL(CurDAG), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+ }
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
+ unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
+ unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
+ unsigned Opc = 0;
+ switch (SOShOp) {
+ case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
+ case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
+ case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
+ case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
+ default:
+ llvm_unreachable("Unknown so_reg opcode!");
+ break;
+ }
+ SDValue SOShImm =
+ CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6);
+ }
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (SelectShifterOperandReg(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, ARM::MOVCCs, MVT::i32, Ops, 7);
+ }
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+ if (!T)
+ return 0;
+
+ unsigned Opc = 0;
+ unsigned TrueImm = T->getZExtValue();
+ if (is_t2_so_imm(TrueImm)) {
+ Opc = ARM::t2MOVCCi;
+ } else if (TrueImm <= 0xffff) {
+ Opc = ARM::t2MOVCCi16;
+ } else if (is_t2_so_imm_not(TrueImm)) {
+ TrueImm = ~TrueImm;
+ Opc = ARM::t2MVNCCi;
+ } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) {
+ // Large immediate.
+ Opc = ARM::t2MOVCCi32imm;
+ }
+
+ if (Opc) {
+ SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+ if (!T)
+ return 0;
+
+ unsigned Opc = 0;
+ unsigned TrueImm = T->getZExtValue();
+ bool isSoImm = is_so_imm(TrueImm);
+ if (isSoImm) {
+ Opc = ARM::MOVCCi;
+ } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) {
+ Opc = ARM::MOVCCi16;
+ } else if (is_so_imm_not(TrueImm)) {
+ TrueImm = ~TrueImm;
+ Opc = ARM::MVNCCi;
+ } else if (TrueVal.getNode()->hasOneUse() &&
+ (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) {
+ // Large immediate.
+ Opc = ARM::MOVCCi32imm;
+ }
+
+ if (Opc) {
+ SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue FalseVal = N->getOperand(0);
+ SDValue TrueVal = N->getOperand(1);
+ SDValue CC = N->getOperand(2);
+ SDValue CCR = N->getOperand(3);
+ SDValue InFlag = N->getOperand(4);
+ assert(CC.getOpcode() == ISD::Constant);
+ assert(CCR.getOpcode() == ISD::Register);
+ ARMCC::CondCodes CCVal =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue();
+
+ if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 18 cost = 1 size = 0
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (Subtarget->isThumb()) {
+ SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ } else {
+ SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false,
+ // (imm:i32)<<P:Pred_so_imm>>:$true,
+ // (imm:i32):$cc)
+ // Emits: (MOVCCi:i32 GPR:i32:$false,
+ // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
+ // Pattern complexity = 10 cost = 1 size = 0
+ if (Subtarget->isThumb()) {
+ SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ } else {
+ SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ }
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+ //
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 11 size = 0
+ //
+ // Also FCPYScc and FCPYDcc.
+ SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: assert(false && "Illegal conditional move type!");
+ break;
+ case MVT::i32:
+ Opc = Subtarget->isThumb()
+ ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
+ : ARM::MOVCCr;
+ break;
+ case MVT::f32:
+ Opc = ARM::VMOVScc;
+ break;
+ case MVT::f64:
+ Opc = ARM::VMOVDcc;
+ break;
+ }
+ return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
+}
+
+SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
+ // The only time a CONCAT_VECTORS operation can have legal types is when
+ // two 64-bit vectors are concatenated to a 128-bit vector.
+ EVT VT = N->getValueType(0);
+ if (!VT.is128BitVector() || N->getNumOperands() != 2)
+ llvm_unreachable("unexpected CONCAT_VECTORS");
+ return PairDRegs(VT, N->getOperand(0), N->getOperand(1));
+}
+
+SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+ bool UseCP = true;
+ if (Subtarget->hasThumb2())
+ // Thumb2-aware targets have the MOVT instruction, so all immediates can
+ // be done with MOV + MOVT, at worst.
+ UseCP = 0;
+ else {
+ if (Subtarget->isThumb()) {
+ UseCP = (Val > 255 && // MOV
+ ~Val > 255 && // MOV + MVN
+ !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL
+ } else
+ UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV
+ ARM_AM::getSOImmVal(~Val) == -1 && // MVN
+ !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs.
+ }
+
+ if (UseCP) {
+ SDValue CPIdx =
+ CurDAG->getTargetConstantPool(ConstantInt::get(
+ Type::getInt32Ty(*CurDAG->getContext()), Val),
+ TLI.getPointerTy());
+
+ SDNode *ResNode;
+ if (Subtarget->isThumb1Only()) {
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
+ ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
+ Ops, 4);
+ } else {
+ SDValue Ops[] = {
+ CPIdx,
+ CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getEntryNode()
+ };
+ ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
+ Ops, 5);
+ }
+ ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
+ return NULL;
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::FrameIndex: {
+ // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ if (Subtarget->isThumb1Only()) {
+ return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI,
+ CurDAG->getTargetConstant(0, MVT::i32));
+ } else {
+ unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
+ ARM::t2ADDri : ARM::ADDri);
+ SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+ }
+ case ISD::SRL:
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+ return I;
+ break;
+ case ISD::SRA:
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
+ return I;
+ break;
+ case ISD::MUL:
+ if (Subtarget->isThumb1Only())
+ break;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ unsigned RHSV = C->getZExtValue();
+ if (!RHSV) break;
+ if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
+ unsigned ShImm = Log2_32(RHSV-1);
+ if (ShImm >= 32)
+ break;
+ SDValue V = N->getOperand(0);
+ ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+ SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6);
+ } else {
+ SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7);
+ }
+ }
+ if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
+ unsigned ShImm = Log2_32(RHSV+1);
+ if (ShImm >= 32)
+ break;
+ SDValue V = N->getOperand(0);
+ ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+ SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
+ } else {
+ SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
+ }
+ }
+ }
+ break;
+ case ISD::AND: {
+ // Check for unsigned bitfield extract
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+ return I;
+
+ // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
+ // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
+ // are entirely contributed by c2 and lower 16-bits are entirely contributed
+ // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
+ // Select it to: "movt x, ((c1 & 0xffff) >> 16)
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ break;
+ unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
+ ? ARM::t2MOVTi16
+ : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
+ if (!Opc)
+ break;
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (!N1C)
+ break;
+ if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
+ SDValue N2 = N0.getOperand(1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ if (!N2C)
+ break;
+ unsigned N1CVal = N1C->getZExtValue();
+ unsigned N2CVal = N2C->getZExtValue();
+ if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
+ (N1CVal & 0xffffU) == 0xffffU &&
+ (N2CVal & 0xffffU) == 0x0U) {
+ SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
+ MVT::i32);
+ SDValue Ops[] = { N0.getOperand(0), Imm16,
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4);
+ }
+ }
+ break;
+ }
+ case ARMISD::VMOVRRD:
+ return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
+ N->getOperand(0), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32));
+ case ISD::UMUL_LOHI: {
+ if (Subtarget->isThumb1Only())
+ break;
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
+ } else {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::UMULL : ARM::UMULLv5,
+ dl, MVT::i32, MVT::i32, Ops, 5);
+ }
+ }
+ case ISD::SMUL_LOHI: {
+ if (Subtarget->isThumb1Only())
+ break;
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
+ } else {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::SMULL : ARM::SMULLv5,
+ dl, MVT::i32, MVT::i32, Ops, 5);
+ }
+ }
+ case ISD::LOAD: {
+ SDNode *ResNode = 0;
+ if (Subtarget->isThumb() && Subtarget->hasThumb2())
+ ResNode = SelectT2IndexedLoad(N);
+ else
+ ResNode = SelectARMIndexedLoad(N);
+ if (ResNode)
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+ }
+ case ARMISD::BRCOND: {
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ unsigned Opc = Subtarget->isThumb() ?
+ ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ SDValue InFlag = N->getOperand(4);
+ assert(N1.getOpcode() == ISD::BasicBlock);
+ assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
+
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()),
+ MVT::i32);
+ SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
+ MVT::Glue, Ops, 5);
+ Chain = SDValue(ResNode, 0);
+ if (N->getNumValues() == 2) {
+ InFlag = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(N, 1), InFlag);
+ }
+ ReplaceUses(SDValue(N, 0),
+ SDValue(Chain.getNode(), Chain.getResNo()));
+ return NULL;
+ }
+ case ARMISD::CMOV:
+ return SelectCMOVOp(N);
+ case ARMISD::CNEG: {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ SDValue InFlag = N->getOperand(4);
+ assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
+
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()),
+ MVT::i32);
+ SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag };
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: assert(false && "Illegal conditional move type!");
+ break;
+ case MVT::f32:
+ Opc = ARM::VNEGScc;
+ break;
+ case MVT::f64:
+ Opc = ARM::VNEGDcc;
+ break;
+ }
+ return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
+ }
+
+ case ARMISD::VZIP: {
+ unsigned Opc = 0;
+ EVT VT = N->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::v8i8: Opc = ARM::VZIPd8; break;
+ case MVT::v4i16: Opc = ARM::VZIPd16; break;
+ case MVT::v2f32:
+ case MVT::v2i32: Opc = ARM::VZIPd32; break;
+ case MVT::v16i8: Opc = ARM::VZIPq8; break;
+ case MVT::v8i16: Opc = ARM::VZIPq16; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = ARM::VZIPq32; break;
+ }
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ }
+ case ARMISD::VUZP: {
+ unsigned Opc = 0;
+ EVT VT = N->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::v8i8: Opc = ARM::VUZPd8; break;
+ case MVT::v4i16: Opc = ARM::VUZPd16; break;
+ case MVT::v2f32:
+ case MVT::v2i32: Opc = ARM::VUZPd32; break;
+ case MVT::v16i8: Opc = ARM::VUZPq8; break;
+ case MVT::v8i16: Opc = ARM::VUZPq16; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = ARM::VUZPq32; break;
+ }
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ }
+ case ARMISD::VTRN: {
+ unsigned Opc = 0;
+ EVT VT = N->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::v8i8: Opc = ARM::VTRNd8; break;
+ case MVT::v4i16: Opc = ARM::VTRNd16; break;
+ case MVT::v2f32:
+ case MVT::v2i32: Opc = ARM::VTRNd32; break;
+ case MVT::v16i8: Opc = ARM::VTRNq8; break;
+ case MVT::v8i16: Opc = ARM::VTRNq16; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = ARM::VTRNq32; break;
+ }
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ }
+ case ARMISD::BUILD_VECTOR: {
+ EVT VecVT = N->getValueType(0);
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (EltVT == MVT::f64) {
+ assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
+ return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ }
+ assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
+ if (NumElts == 2)
+ return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
+ return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3));
+ }
+
+ case ARMISD::VLD2DUP: {
+ unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
+ ARM::VLD2DUPd32Pseudo };
+ return SelectVLDDup(N, false, 2, Opcodes);
+ }
+
+ case ARMISD::VLD3DUP: {
+ unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo,
+ ARM::VLD3DUPd32Pseudo };
+ return SelectVLDDup(N, false, 3, Opcodes);
+ }
+
+ case ARMISD::VLD4DUP: {
+ unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo,
+ ARM::VLD4DUPd32Pseudo };
+ return SelectVLDDup(N, false, 4, Opcodes);
+ }
+
+ case ARMISD::VLD2DUP_UPD: {
+ unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD,
+ ARM::VLD2DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 2, Opcodes);
+ }
+
+ case ARMISD::VLD3DUP_UPD: {
+ unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD,
+ ARM::VLD3DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 3, Opcodes);
+ }
+
+ case ARMISD::VLD4DUP_UPD: {
+ unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD,
+ ARM::VLD4DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 4, Opcodes);
+ }
+
+ case ARMISD::VLD1_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD,
+ ARM::VLD1d32_UPD, ARM::VLD1d64_UPD };
+ unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD,
+ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+ return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VLD2_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
+ ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
+ ARM::VLD2q32Pseudo_UPD };
+ return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VLD3_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
+ ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+ ARM::VLD3q16oddPseudo_UPD,
+ ARM::VLD3q32oddPseudo_UPD };
+ return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VLD4_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
+ ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD };
+ return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VLD2LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd16Pseudo_UPD,
+ ARM::VLD2LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
+ ARM::VLD2LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VLD3LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd16Pseudo_UPD,
+ ARM::VLD3LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
+ ARM::VLD3LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VLD4LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd16Pseudo_UPD,
+ ARM::VLD4LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
+ ARM::VLD4LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST1_UPD: {
+ unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD,
+ ARM::VST1d32_UPD, ARM::VST1d64_UPD };
+ unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD,
+ ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+ return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VST2_UPD: {
+ unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
+ ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
+ ARM::VST2q32Pseudo_UPD };
+ return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VST3_UPD: {
+ unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
+ ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+ ARM::VST3q16oddPseudo_UPD,
+ ARM::VST3q32oddPseudo_UPD };
+ return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VST4_UPD: {
+ unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
+ ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD };
+ return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VST2LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd16Pseudo_UPD,
+ ARM::VST2LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
+ ARM::VST2LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST3LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd16Pseudo_UPD,
+ ARM::VST3LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
+ ARM::VST3LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST4LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd16Pseudo_UPD,
+ ARM::VST4LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
+ ARM::VST4LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
+ }
+
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_neon_vld1: {
+ unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
+ ARM::VLD1d32, ARM::VLD1d64 };
+ unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
+ ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
+ return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vld2: {
+ unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
+ ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
+ ARM::VLD2q32Pseudo };
+ return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vld3: {
+ unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo,
+ ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo,
+ ARM::VLD3q16oddPseudo,
+ ARM::VLD3q32oddPseudo };
+ return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vld4: {
+ unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo,
+ ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo,
+ ARM::VLD4q16oddPseudo,
+ ARM::VLD4q32oddPseudo };
+ return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vld2lane: {
+ unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
+ ARM::VLD2LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vld3lane: {
+ unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
+ ARM::VLD3LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vld4lane: {
+ unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
+ ARM::VLD4LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst1: {
+ unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
+ ARM::VST1d32, ARM::VST1d64 };
+ unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
+ ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
+ return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vst2: {
+ unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
+ ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+ ARM::VST2q32Pseudo };
+ return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vst3: {
+ unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo,
+ ARM::VST3d32Pseudo, ARM::VST1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo,
+ ARM::VST3q16oddPseudo,
+ ARM::VST3q32oddPseudo };
+ return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vst4: {
+ unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
+ ARM::VST4d32Pseudo, ARM::VST1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo,
+ ARM::VST4q16oddPseudo,
+ ARM::VST4q32oddPseudo };
+ return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vst2lane: {
+ unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
+ ARM::VST2LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst3lane: {
+ unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
+ ARM::VST3LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst4lane: {
+ unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
+ ARM::VST4LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
+ }
+ }
+ break;
+ }
+
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_neon_vtbl2:
+ return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo);
+ case Intrinsic::arm_neon_vtbl3:
+ return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
+ case Intrinsic::arm_neon_vtbl4:
+ return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
+
+ case Intrinsic::arm_neon_vtbx2:
+ return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo);
+ case Intrinsic::arm_neon_vtbx3:
+ return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
+ case Intrinsic::arm_neon_vtbx4:
+ return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
+ }
+ break;
+ }
+
+ case ISD::CONCAT_VECTORS:
+ return SelectConcatVector(N);
+ }
+
+ return SelectCode(N);
+}
+
+bool ARMDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+ // Require the address to be in a register. That is safe for all ARM
+ // variants and it is hard to do anything much smarter without knowing
+ // how the operand is used.
+ OutOps.push_back(Op);
+ return false;
+}
+
+/// createARMISelDag - This pass converts a legalized DAG into a
+/// ARM-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new ARMDAGToDAGISel(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
new file mode 100644
index 0000000..1835ec0
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -0,0 +1,6984 @@
+//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-isel"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMCallingConv.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMPerfectShuffle.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <sstream>
+using namespace llvm;
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
+
+// This option should go away when tail calls fully work.
+static cl::opt<bool>
+EnableARMTailCalls("arm-tail-calls", cl::Hidden,
+ cl::desc("Generate tail calls (TEMPORARY OPTION)."),
+ cl::init(false));
+
+cl::opt<bool>
+EnableARMLongCalls("arm-long-calls", cl::Hidden,
+ cl::desc("Generate calls via indirect call instructions"),
+ cl::init(false));
+
+static cl::opt<bool>
+ARMInterworking("arm-interworking", cl::Hidden,
+ cl::desc("Enable / disable ARM interworking (for debugging only)"),
+ cl::init(true));
+
+void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
+ EVT PromotedBitwiseVT) {
+ if (VT != PromotedLdStVT) {
+ setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+ AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+
+ setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+ AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+ }
+
+ EVT ElemTy = VT.getVectorElementType();
+ if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
+ setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+ if (ElemTy != MVT::i32) {
+ setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
+ }
+ setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
+ setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
+ if (VT.isInteger()) {
+ setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
+ for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ setTruncStoreAction(VT.getSimpleVT(),
+ (MVT::SimpleValueType)InnerVT, Expand);
+ }
+ setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
+
+ // Promote all bit-wise operations.
+ if (VT.isInteger() && VT != PromotedBitwiseVT) {
+ setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
+ AddPromotedToType (ISD::AND, VT.getSimpleVT(),
+ PromotedBitwiseVT.getSimpleVT());
+ setOperationAction(ISD::OR, VT.getSimpleVT(), Promote);
+ AddPromotedToType (ISD::OR, VT.getSimpleVT(),
+ PromotedBitwiseVT.getSimpleVT());
+ setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
+ AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
+ PromotedBitwiseVT.getSimpleVT());
+ }
+
+ // Neon does not support vector divide/remainder operations.
+ setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
+}
+
+void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
+ addRegisterClass(VT, ARM::DPRRegisterClass);
+ addTypeForNEON(VT, MVT::f64, MVT::v2i32);
+}
+
+void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
+ addRegisterClass(VT, ARM::QPRRegisterClass);
+ addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
+}
+
+static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
+ if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
+ return new TargetLoweringObjectFileMachO();
+
+ return new ARMElfTargetObjectFile();
+}
+
+ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, createTLOF(TM)) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ RegInfo = TM.getRegisterInfo();
+ Itins = TM.getInstrItineraryData();
+
+ if (Subtarget->isTargetDarwin()) {
+ // Uses VFP for Thumb libfuncs if available.
+ if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+ // Single-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
+ setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
+ setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
+ setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
+
+ // Double-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
+ setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
+ setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
+ setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
+
+ // Single-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
+ setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
+ setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
+ setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
+ setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
+ setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
+ setLibcallName(RTLIB::UO_F32, "__unordsf2vfp");
+ setLibcallName(RTLIB::O_F32, "__unordsf2vfp");
+
+ setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
+
+ // Double-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
+ setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
+ setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
+ setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
+ setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
+ setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
+ setLibcallName(RTLIB::UO_F64, "__unorddf2vfp");
+ setLibcallName(RTLIB::O_F64, "__unorddf2vfp");
+
+ setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
+
+ // Floating-point to integer conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
+
+ // Conversions between floating types.
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp");
+
+ // Integer to floating-point conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ // FIXME: There appears to be some naming inconsistency in ARM libgcc:
+ // e.g., __floatunsidf vs. __floatunssidfvfp.
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+ }
+ }
+
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, 0);
+ setLibcallName(RTLIB::SRL_I128, 0);
+ setLibcallName(RTLIB::SRA_I128, 0);
+
+ if (Subtarget->isAAPCS_ABI()) {
+ // Double-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 2
+ setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
+ setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
+ setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
+ setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
+ setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
+
+ // Double-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 3
+ setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
+ setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+ setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
+ setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
+ setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
+ setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
+ setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
+ setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
+ setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+ setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun");
+ setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
+ setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun");
+ setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
+ setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
+
+ // Single-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 4
+ setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
+ setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
+ setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
+ setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
+ setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
+
+ // Single-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 5
+ setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
+ setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+ setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
+ setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
+ setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
+ setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
+ setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
+ setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
+ setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+ setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun");
+ setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
+ setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun");
+ setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
+ setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
+
+ // Floating-point to integer conversions.
+ // RTABI chapter 4.1.2, Table 6
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
+ setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
+ setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
+ setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
+
+ // Conversions between floating types.
+ // RTABI chapter 4.1.2, Table 7
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d");
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
+
+ // Integer to floating-point conversions.
+ // RTABI chapter 4.1.2, Table 8
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
+ setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
+ setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
+ setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+
+ // Long long helper functions
+ // RTABI chapter 4.2, Table 9
+ setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul");
+ setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
+ setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
+ setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
+ setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
+ setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
+ setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
+
+ // Integer division functions
+ // RTABI chapter 4.3.1
+ setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv");
+ setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
+ setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
+ setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv");
+ setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
+ setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
+ setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
+ }
+
+ if (Subtarget->isThumb1Only())
+ addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
+ else
+ addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
+ if (!Subtarget->isFPOnlySP())
+ addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ }
+
+ if (Subtarget->hasNEON()) {
+ addDRTypeForNEON(MVT::v2f32);
+ addDRTypeForNEON(MVT::v8i8);
+ addDRTypeForNEON(MVT::v4i16);
+ addDRTypeForNEON(MVT::v2i32);
+ addDRTypeForNEON(MVT::v1i64);
+
+ addQRTypeForNEON(MVT::v4f32);
+ addQRTypeForNEON(MVT::v2f64);
+ addQRTypeForNEON(MVT::v16i8);
+ addQRTypeForNEON(MVT::v8i16);
+ addQRTypeForNEON(MVT::v4i32);
+ addQRTypeForNEON(MVT::v2i64);
+
+ // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
+ // neither Neon nor VFP support any arithmetic operations on it.
+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
+ setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+ setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
+ setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
+ setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
+ setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+
+ setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+
+ // Neon does not support some operations on v1i64 and v2i64 types.
+ setOperationAction(ISD::MUL, MVT::v1i64, Expand);
+ // Custom handling for some quad-vector types to detect VMULL.
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ // Custom handling for some vector types to avoid expensive expansions
+ setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
+ setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
+
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::SELECT_CC);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::STORE);
+ }
+
+ computeRegisterProperties();
+
+ // ARM does not have f32 extending load.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ // ARM does not have i1 sign extending load.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // ARM supports all 4 flavors of integer indexed load / store.
+ if (!Subtarget->isThumb1Only()) {
+ for (unsigned im = (unsigned)ISD::PRE_INC;
+ im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+ setIndexedLoadAction(im, MVT::i1, Legal);
+ setIndexedLoadAction(im, MVT::i8, Legal);
+ setIndexedLoadAction(im, MVT::i16, Legal);
+ setIndexedLoadAction(im, MVT::i32, Legal);
+ setIndexedStoreAction(im, MVT::i1, Legal);
+ setIndexedStoreAction(im, MVT::i8, Legal);
+ setIndexedStoreAction(im, MVT::i16, Legal);
+ setIndexedStoreAction(im, MVT::i32, Legal);
+ }
+ }
+
+ // i64 operation support.
+ if (Subtarget->isThumb1Only()) {
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ } else {
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ if (!Subtarget->hasV6Ops())
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ }
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL, MVT::i64, Custom);
+ setOperationAction(ISD::SRA, MVT::i64, Custom);
+
+ // ARM does not have ROTL.
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+ // Only ARMv6 has BSWAP.
+ if (!Subtarget->hasV6Ops())
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ // These are expanded into libcalls.
+ if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
+ // v7M has a hardware divider
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ }
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setExceptionPointerRegister(ARM::R0);
+ setExceptionSelectorRegister(ARM::R1);
+
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
+ // the default expansion.
+ if (Subtarget->hasDataBarrier() ||
+ (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
+ // membarrier needs custom lowering; the rest are legal and handled
+ // normally.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ } else {
+ // Set them all for expansion, which will force libcalls.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ // Since the libcalls include locking, fold in the fences
+ setShouldFoldAtomicFences(true);
+ }
+ // 64-bit versions are always libcalls (for now)
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
+
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+
+ // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
+ if (!Subtarget->hasV6Ops()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ }
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
+ // iff target supports vfp2.
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+ }
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ if (Subtarget->isTargetDarwin()) {
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
+ }
+
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+
+ // We don't support sin/cos/fmod/copysign/pow
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ }
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+
+ // Various VFP goodness
+ if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
+ // int <-> fp are custom expanded into bit_convert + ARMISD ops.
+ if (Subtarget->hasVFP2()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ }
+ // Special handling for half-precision FP.
+ if (!Subtarget->hasFP16()) {
+ setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
+ setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
+ }
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
+ setTargetDAGCombine(ISD::MUL);
+
+ if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON())
+ setTargetDAGCombine(ISD::OR);
+ if (Subtarget->hasNEON())
+ setTargetDAGCombine(ISD::AND);
+
+ setStackPointerRegisterToSaveRestore(ARM::SP);
+
+ if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+ setSchedulingPreference(Sched::RegPressure);
+ else
+ setSchedulingPreference(Sched::Hybrid);
+
+ //// temporary - rewrite interface to use type
+ maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
+
+ // On ARM arguments smaller than 4 bytes are extended, so all arguments
+ // are at least 4 bytes aligned.
+ setMinStackArgumentAlignment(4);
+
+ benefitFromCodePlacementOpt = true;
+}
+
+// FIXME: It might make sense to define the representative register class as the
+// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
+// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
+// SPR's representative would be DPR_VFP2. This should work well if register
+// pressure tracking were modified such that a register use would increment the
+// pressure of the register class's representative and all of it's super
+// classes' representatives transitively. We have not implemented this because
+// of the difficulty prior to coalescing of modeling operand register classes
+// due to the common occurence of cross class copies and subregister insertions
+// and extractions.
+std::pair<const TargetRegisterClass*, uint8_t>
+ARMTargetLowering::findRepresentativeClass(EVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ // Use DPR as representative register class for all floating point
+ // and vector types. Since there are 32 SPR registers and 32 DPR registers so
+ // the cost is 1 for both f32 and f64.
+ case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
+ case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
+ RRC = ARM::DPRRegisterClass;
+ // When NEON is used for SP, only half of the register file is available
+ // because operations that define both SP and DP results will be constrained
+ // to the VFP2 class (D0-D15). We currently model this constraint prior to
+ // coalescing by double-counting the SP regs. See the FIXME above.
+ if (Subtarget->useNEONForSinglePrecisionFP())
+ Cost = 2;
+ break;
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 2;
+ break;
+ case MVT::v4i64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 4;
+ break;
+ case MVT::v8i64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 8;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
+const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case ARMISD::Wrapper: return "ARMISD::Wrapper";
+ case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN";
+ case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
+ case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
+ case ARMISD::CALL: return "ARMISD::CALL";
+ case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
+ case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
+ case ARMISD::tCALL: return "ARMISD::tCALL";
+ case ARMISD::BRCOND: return "ARMISD::BRCOND";
+ case ARMISD::BR_JT: return "ARMISD::BR_JT";
+ case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
+ case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
+ case ARMISD::CMP: return "ARMISD::CMP";
+ case ARMISD::CMPZ: return "ARMISD::CMPZ";
+ case ARMISD::CMPFP: return "ARMISD::CMPFP";
+ case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
+ case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
+ case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
+ case ARMISD::CMOV: return "ARMISD::CMOV";
+ case ARMISD::CNEG: return "ARMISD::CNEG";
+
+ case ARMISD::RBIT: return "ARMISD::RBIT";
+
+ case ARMISD::FTOSI: return "ARMISD::FTOSI";
+ case ARMISD::FTOUI: return "ARMISD::FTOUI";
+ case ARMISD::SITOF: return "ARMISD::SITOF";
+ case ARMISD::UITOF: return "ARMISD::UITOF";
+
+ case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
+ case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
+ case ARMISD::RRX: return "ARMISD::RRX";
+
+ case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
+ case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
+
+ case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
+ case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+ case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
+
+ case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
+
+ case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
+
+ case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
+
+ case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
+ case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
+
+ case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
+
+ case ARMISD::VCEQ: return "ARMISD::VCEQ";
+ case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
+ case ARMISD::VCGE: return "ARMISD::VCGE";
+ case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
+ case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
+ case ARMISD::VCGEU: return "ARMISD::VCGEU";
+ case ARMISD::VCGT: return "ARMISD::VCGT";
+ case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
+ case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
+ case ARMISD::VCGTU: return "ARMISD::VCGTU";
+ case ARMISD::VTST: return "ARMISD::VTST";
+
+ case ARMISD::VSHL: return "ARMISD::VSHL";
+ case ARMISD::VSHRs: return "ARMISD::VSHRs";
+ case ARMISD::VSHRu: return "ARMISD::VSHRu";
+ case ARMISD::VSHLLs: return "ARMISD::VSHLLs";
+ case ARMISD::VSHLLu: return "ARMISD::VSHLLu";
+ case ARMISD::VSHLLi: return "ARMISD::VSHLLi";
+ case ARMISD::VSHRN: return "ARMISD::VSHRN";
+ case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
+ case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
+ case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
+ case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
+ case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
+ case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
+ case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
+ case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
+ case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
+ case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
+ case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
+ case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
+ case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
+ case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
+ case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
+ case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
+ case ARMISD::VDUP: return "ARMISD::VDUP";
+ case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
+ case ARMISD::VEXT: return "ARMISD::VEXT";
+ case ARMISD::VREV64: return "ARMISD::VREV64";
+ case ARMISD::VREV32: return "ARMISD::VREV32";
+ case ARMISD::VREV16: return "ARMISD::VREV16";
+ case ARMISD::VZIP: return "ARMISD::VZIP";
+ case ARMISD::VUZP: return "ARMISD::VUZP";
+ case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::VMULLs: return "ARMISD::VMULLs";
+ case ARMISD::VMULLu: return "ARMISD::VMULLu";
+ case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
+ case ARMISD::FMAX: return "ARMISD::FMAX";
+ case ARMISD::FMIN: return "ARMISD::FMIN";
+ case ARMISD::BFI: return "ARMISD::BFI";
+ case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
+ case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
+ case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
+ case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
+ case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
+ case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
+ case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
+ case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
+ case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
+ case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
+ case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
+ case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
+ case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
+ case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
+ case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
+ case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
+ case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
+ case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
+ case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
+ case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
+ case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
+ case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
+ }
+}
+
+/// getRegClassFor - Return the register class that should be used for the
+/// specified value type.
+TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+ // Map v4i64 to QQ registers but do not make the type legal. Similarly map
+ // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
+ // load / store 4 to 8 consecutive D registers.
+ if (Subtarget->hasNEON()) {
+ if (VT == MVT::v4i64)
+ return ARM::QQPRRegisterClass;
+ else if (VT == MVT::v8i64)
+ return ARM::QQQQPRRegisterClass;
+ }
+ return TargetLowering::getRegClassFor(VT);
+}
+
+// Create a fast isel object.
+FastISel *
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+ return ARM::createFastISel(funcInfo);
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
+ return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
+}
+
+/// getMaximalGlobalOffset - Returns the maximal possible offset which can
+/// be used for loads / stores from the global.
+unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
+ return (Subtarget->isThumb1Only() ? 127 : 4095);
+}
+
+Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
+ unsigned NumVals = N->getNumValues();
+ if (!NumVals)
+ return Sched::RegPressure;
+
+ for (unsigned i = 0; i != NumVals; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (VT.isFloatingPoint() || VT.isVector())
+ return Sched::Latency;
+ }
+
+ if (!N->isMachineOpcode())
+ return Sched::RegPressure;
+
+ // Load are scheduled for latency even if there instruction itinerary
+ // is not available.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+
+ if (TID.getNumDefs() == 0)
+ return Sched::RegPressure;
+ if (!Itins->isEmpty() &&
+ Itins->getOperandCycle(TID.getSchedClass(), 0) > 2)
+ return Sched::Latency;
+
+ return Sched::RegPressure;
+}
+
+// FIXME: Move to RegInfo
+unsigned
+ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case ARM::tGPRRegClassID:
+ return TFI->hasFP(MF) ? 4 : 5;
+ case ARM::GPRRegClassID: {
+ unsigned FP = TFI->hasFP(MF) ? 1 : 0;
+ return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
+ }
+ case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
+ case ARM::DPRRegClassID:
+ return 32 - 10;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Lowering Code
+//===----------------------------------------------------------------------===//
+
+/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
+static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code!");
+ case ISD::SETNE: return ARMCC::NE;
+ case ISD::SETEQ: return ARMCC::EQ;
+ case ISD::SETGT: return ARMCC::GT;
+ case ISD::SETGE: return ARMCC::GE;
+ case ISD::SETLT: return ARMCC::LT;
+ case ISD::SETLE: return ARMCC::LE;
+ case ISD::SETUGT: return ARMCC::HI;
+ case ISD::SETUGE: return ARMCC::HS;
+ case ISD::SETULT: return ARMCC::LO;
+ case ISD::SETULE: return ARMCC::LS;
+ }
+}
+
+/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
+static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ ARMCC::CondCodes &CondCode2) {
+ CondCode2 = ARMCC::AL;
+ switch (CC) {
+ default: llvm_unreachable("Unknown FP condition!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+ case ISD::SETGT:
+ case ISD::SETOGT: CondCode = ARMCC::GT; break;
+ case ISD::SETGE:
+ case ISD::SETOGE: CondCode = ARMCC::GE; break;
+ case ISD::SETOLT: CondCode = ARMCC::MI; break;
+ case ISD::SETOLE: CondCode = ARMCC::LS; break;
+ case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+ case ISD::SETO: CondCode = ARMCC::VC; break;
+ case ISD::SETUO: CondCode = ARMCC::VS; break;
+ case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+ case ISD::SETUGT: CondCode = ARMCC::HI; break;
+ case ISD::SETUGE: CondCode = ARMCC::PL; break;
+ case ISD::SETLT:
+ case ISD::SETULT: CondCode = ARMCC::LT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: CondCode = ARMCC::LE; break;
+ case ISD::SETNE:
+ case ISD::SETUNE: CondCode = ARMCC::NE; break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "ARMGenCallingConv.inc"
+
+/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
+/// given CallingConvention value.
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg) const {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ if (Subtarget->hasVFP2() && !isVarArg) {
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+ // For AAPCS ABI targets, just use VFP variant of the calling convention.
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ }
+ // Fallthrough
+ case CallingConv::C: {
+ // Use target triple & subtarget features to do actual dispatch.
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ else if (Subtarget->hasVFP2() &&
+ FloatABIType == FloatABI::Hard && !isVarArg)
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ }
+ case CallingConv::ARM_AAPCS_VFP:
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ case CallingConv::ARM_APCS:
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ }
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins,
+ CCAssignFnForNode(CallConv, /* Return*/ true,
+ isVarArg));
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ SDValue Val;
+ if (VA.needsCustom()) {
+ // Handle f64 or half of a v2f64.
+ SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Lo.getValue(1);
+ InFlag = Lo.getValue(2);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Hi.getValue(1);
+ InFlag = Hi.getValue(2);
+ Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+ DAG.getConstant(0, MVT::i32));
+
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+ Chain = Lo.getValue(1);
+ InFlag = Lo.getValue(2);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+ Chain = Hi.getValue(1);
+ InFlag = Hi.getValue(2);
+ Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+ DAG.getConstant(1, MVT::i32));
+ }
+ } else {
+ Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ }
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
+ break;
+ }
+
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.
+/// Sometimes what we are copying is the end of a larger object, the part that
+/// does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ DebugLoc dl) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(0), MachinePointerInfo(0));
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack.
+SDValue
+ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
+ SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ if (Flags.isByVal())
+ return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
+
+ return DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(LocMemOffset),
+ false, false, 0);
+}
+
+void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+ SDValue Chain, SDValue &Arg,
+ RegsToPassVector &RegsToPass,
+ CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &StackPtr,
+ SmallVector<SDValue, 8> &MemOpChains,
+ ISD::ArgFlagsTy Flags) const {
+
+ SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Arg);
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
+
+ if (NextVA.isRegLoc())
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
+ else {
+ assert(NextVA.isMemLoc());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
+ dl, DAG, NextVA,
+ Flags));
+ }
+}
+
+/// LowerCall - Lowering a call into a callseq_start <-
+/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
+/// nodes.
+SDValue
+ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool IsSibCall = false;
+ // Temporarily disable tail calls so things don't break.
+ if (!EnableARMTailCalls)
+ isTailCall = false;
+ if (isTailCall) {
+ // Check if it's really possible to do a tail call.
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
+ // We don't support GuaranteedTailCallOpt for ARM, only automatically
+ // detected sibcalls.
+ if (isTailCall) {
+ ++NumTailCalls;
+ IsSibCall = true;
+ }
+ }
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+ *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs,
+ CCAssignFnForNode(CallConv, /* Return*/ false,
+ isVarArg));
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // For tail calls, memory operands are available in our caller's stack.
+ if (IsSibCall)
+ NumBytes = 0;
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ if (!IsSibCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+ RegsToPassVector RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads. In the case
+ // of tail call optimization, arguments are handled later.
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[realArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
+ if (VA.needsCustom()) {
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(0, MVT::i32));
+ SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(1, MVT::i32));
+
+ PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
+ VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ if (VA.isRegLoc()) {
+ PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
+ VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+ } else {
+ assert(VA.isMemLoc());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
+ dl, DAG, VA, Flags));
+ }
+ } else {
+ PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
+ StackPtr, MemOpChains, Flags);
+ }
+ } else if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else if (!IsSibCall) {
+ assert(VA.isMemLoc());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+ dl, DAG, VA, Flags));
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ // Tail call byval lowering might overwrite argument registers so in case of
+ // tail call optimization the copies to registers are lowered later.
+ if (!isTailCall)
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+
+ // Do not flag preceeding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDValue();
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ bool isDirect = false;
+ bool isARMFunc = false;
+ bool isLocalARMFunc = false;
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ if (EnableARMLongCalls) {
+ assert (getTargetMachine().getRelocationModel() == Reloc::Static
+ && "long-calls with non-static relocation model!");
+ // Handle a global address or an external symbol. If it's not one of
+ // those, the target's already in a register, so we don't need to do
+ // anything extra.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ // Create a constant pool entry for the callee address
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
+ ARMPCLabelIndex,
+ ARMCP::CPValue, 0);
+ // Get the address of the callee into a register
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const char *Sym = S->getSymbol();
+
+ // Create a constant pool entry for the callee address
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+ Sym, ARMPCLabelIndex, 0);
+ // Get the address of the callee into a register
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ }
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ isDirect = true;
+ bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
+ bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // ARM call to a local ARM function is predicable.
+ isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
+ // tBX takes a register source operand.
+ if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
+ ARMPCLabelIndex,
+ ARMCP::CPValue, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+ getPointerTy(), Callee, PICLabel);
+ } else {
+ // On ELF targets for PIC code, direct calls should go through the PLT
+ unsigned OpFlags = 0;
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ isDirect = true;
+ bool isStub = Subtarget->isTargetDarwin() &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // tBX takes a register source operand.
+ const char *Sym = S->getSymbol();
+ if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+ Sym, ARMPCLabelIndex, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+ getPointerTy(), Callee, PICLabel);
+ } else {
+ unsigned OpFlags = 0;
+ // On ELF targets for PIC code, direct calls should go through the PLT
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+ Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
+ }
+ }
+
+ // FIXME: handle tail calls differently.
+ unsigned CallOpc;
+ if (Subtarget->isThumb()) {
+ if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
+ CallOpc = ARMISD::CALL_NOLINK;
+ else
+ CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
+ } else {
+ CallOpc = (isDirect || Subtarget->hasV5TOps())
+ ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
+ : ARMISD::CALL_NOLINK;
+ }
+
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ if (isTailCall)
+ return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+
+ // Returns a chain and a flag for retval copy to use.
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+}
+
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+ MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+ const ARMInstrInfo *TII) {
+ unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+ int FI = INT_MAX;
+ if (Arg.getOpcode() == ISD::CopyFromReg) {
+ unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(VR))
+ return false;
+ MachineInstr *Def = MRI->getVRegDef(VR);
+ if (!Def)
+ return false;
+ if (!Flags.isByVal()) {
+ if (!TII->isLoadFromStackSlot(Def, FI))
+ return false;
+ } else {
+ return false;
+ }
+ } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+ if (Flags.isByVal())
+ // ByVal argument is passed in as a pointer but it's now being
+ // dereferenced. e.g.
+ // define @foo(%struct.X* %A) {
+ // tail call @bar(%struct.X* byval %A)
+ // }
+ return false;
+ SDValue Ptr = Ld->getBasePtr();
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+ if (!FINode)
+ return false;
+ FI = FINode->getIndex();
+ } else
+ return false;
+
+ assert(FI != INT_MAX);
+ if (!MFI->isFixedObjectIndex(FI))
+ return false;
+ return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes. This is what gcc calls sibcall.
+
+ // Do not sibcall optimize vararg calls unless the call site is not passing
+ // any arguments.
+ if (isVarArg && !Outs.empty())
+ return false;
+
+ // Also avoid sibcall optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
+ // emitEpilogue is not ready for them.
+ // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
+ // LR. This means if we need to reload LR, it takes an extra instructions,
+ // which outweighs the value of the tail call; but here we don't know yet
+ // whether LR is going to be used. Probably the right approach is to
+ // generate the tail call here and turn it back into CALL/RET in
+ // emitEpilogue if LR is used.
+
+ // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
+ // but we need to make sure there are enough registers; the only valid
+ // registers are the 4 used for parameters. We don't currently do this
+ // case.
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ // If the calling conventions do not match, then we'd better make sure the
+ // results are returned in the same way as what the caller expects.
+ if (!CCMatch) {
+ SmallVector<CCValAssign, 16> RVLocs1;
+ CCState CCInfo1(CalleeCC, false, getTargetMachine(),
+ RVLocs1, *DAG.getContext());
+ CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
+
+ SmallVector<CCValAssign, 16> RVLocs2;
+ CCState CCInfo2(CallerCC, false, getTargetMachine(),
+ RVLocs2, *DAG.getContext());
+ CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+ if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+ return false;
+ if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+ return false;
+ if (RVLocs1[i].isRegLoc()) {
+ if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+ return false;
+ } else {
+ if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+ return false;
+ }
+ }
+ }
+
+ // If the callee takes no arguments then go on to check the results of the
+ // call.
+ if (!Outs.empty()) {
+ // Check if stack adjustment is needed. For now, do not do this if any
+ // argument is passed on the stack.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
+ ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs,
+ CCAssignFnForNode(CalleeCC, false, isVarArg));
+ if (CCInfo.getNextStackOffset()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Check if the arguments are already laid out in the right way as
+ // the caller's fixed stack objects.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const ARMInstrInfo *TII =
+ ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ EVT RegVT = VA.getLocVT();
+ SDValue Arg = OutVals[realArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (VA.needsCustom()) {
+ // f64 and vector types are split into multiple registers or
+ // register/stack-slot combinations. The types will not match
+ // the registers; give up on memory f64 refs until we figure
+ // out what to do about this.
+ if (!VA.isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (RegVT == MVT::v2f64) {
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ }
+ } else if (!VA.isRegLoc()) {
+ if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+ MFI, MRI, TII))
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+SDValue
+ARMTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to a location.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slots.
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
+ *DAG.getContext());
+
+ // Analyze outgoing return values.
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
+ isVarArg));
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0, realRVLocIdx = 0;
+ i != RVLocs.size();
+ ++i, ++realRVLocIdx) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ SDValue Arg = OutVals[realRVLocIdx];
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.needsCustom()) {
+ if (VA.getLocVT() == MVT::v2f64) {
+ // Extract the first half and return it in two registers.
+ SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(0, MVT::i32));
+ SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Half);
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
+ Flag = Chain.getValue(1);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ HalfGPRs.getValue(1), Flag);
+ Flag = Chain.getValue(1);
+ VA = RVLocs[++i]; // skip ahead to next loc
+
+ // Extract the 2nd half and fall through to handle it as an f64 value.
+ Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(1, MVT::i32));
+ }
+ // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
+ // available.
+ SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
+ Flag = Chain.getValue(1);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
+ Flag);
+ } else
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+
+ // Guarantee that all emitted copies are
+ // stuck together, avoiding something bad.
+ Flag = Chain.getValue(1);
+ }
+
+ SDValue result;
+ if (Flag.getNode())
+ result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ else // Return Void
+ result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
+
+ return result;
+}
+
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ unsigned NumCopies = 0;
+ SDNode* Copies[2];
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::CopyToReg) {
+ Copies[NumCopies++] = Use;
+ } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
+ // f64 returned in a pair of GPRs.
+ for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != ISD::CopyToReg)
+ return false;
+ Copies[UI.getUse().getResNo()] = *UI;
+ ++NumCopies;
+ }
+ } else if (Use->getOpcode() == ISD::BITCAST) {
+ // f32 returned in a single GPR.
+ if (!Use->hasNUsesOfValue(1, 0))
+ return false;
+ Use = *Use->use_begin();
+ if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
+ return false;
+ Copies[NumCopies++] = Use;
+ } else {
+ return false;
+ }
+
+ if (NumCopies != 1 && NumCopies != 2)
+ return false;
+
+ bool HasRet = false;
+ for (unsigned i = 0; i < NumCopies; ++i) {
+ SDNode *Copy = Copies[i];
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ SDNode *Use = *UI;
+ if (Use == Copies[0] || Use == Copies[1])
+ continue;
+ return false;
+ }
+ if (UI->getOpcode() != ARMISD::RET_FLAG)
+ return false;
+ HasRet = true;
+ }
+ }
+
+ return HasRet;
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOVi.
+static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+ EVT PtrVT = Op.getValueType();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ SDValue Res;
+ if (CP->isMachineConstantPoolEntry())
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ else
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment());
+ return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
+}
+
+unsigned ARMTargetLowering::getJumpTableEncoding() const {
+ return MachineJumpTableInfo::EK_Inline;
+}
+
+SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = 0;
+ DebugLoc DL = Op.getDebugLoc();
+ EVT PtrVT = getPointerTy();
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ SDValue CPAddr;
+ if (RelocM == Reloc::Static) {
+ CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
+ } else {
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
+ ARMCP::CPBlockAddress,
+ PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ }
+ CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ if (RelocM == Reloc::Static)
+ return Result;
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model
+SDValue
+ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = GA->getDebugLoc();
+ EVT PtrVT = getPointerTy();
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
+ SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
+ Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue Chain = Argument.getValue(1);
+
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
+
+ // call __tls_get_addr.
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Entry.Node = Argument;
+ Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
+ Args.push_back(Entry);
+ // FIXME: is there useful debug info available here?
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
+ false, false, false, false,
+ 0, CallingConv::C, false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+ return CallResult.first;
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" or
+// "local exec" model.
+SDValue
+ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ const GlobalValue *GV = GA->getGlobal();
+ DebugLoc dl = GA->getDebugLoc();
+ SDValue Offset;
+ SDValue Chain = DAG.getEntryNode();
+ EVT PtrVT = getPointerTy();
+ // Get the Thread Pointer
+ SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+
+ if (GV->isDeclaration()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ // Initial exec model.
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true);
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ Chain = Offset.getValue(1);
+
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
+
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ } else {
+ // local exec model
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF);
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ }
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+ // TODO: implement the "local dynamic" model
+ assert(Subtarget->isTargetELF() &&
+ "TLS not implemented for non-ELF targets");
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ // If the relocation model is PIC, use the "General Dynamic" TLS Model,
+ // otherwise use the "Local Exec" TLS Model
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ return LowerToTLSGeneralDynamicModel(GA, DAG);
+ else
+ return LowerToTLSExecModels(GA, DAG);
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ if (RelocM == Reloc::PIC_) {
+ bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue Chain = Result.getValue(1);
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+ Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
+ if (!UseGOTOFF)
+ Result = DAG.getLoad(PtrVT, dl, Chain, Result,
+ MachinePointerInfo::getGOT(), false, false, 0);
+ return Result;
+ }
+
+ // If we have T2 ops, we can materialize the address directly via movt/movw
+ // pair. This is always cheaper.
+ if (Subtarget->useMovt()) {
+ ++NumMovwMovt;
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes.
+ return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+ } else {
+ SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ }
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ if (Subtarget->useMovt()) {
+ ++NumMovwMovt;
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes.
+ if (RelocM == Reloc::Static)
+ return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+
+ unsigned Wrapper = (RelocM == Reloc::PIC_)
+ ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
+ SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+ Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, 0);
+ return Result;
+ }
+
+ unsigned ARMPCLabelIndex = 0;
+ SDValue CPAddr;
+ if (RelocM == Reloc::Static) {
+ CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ } else {
+ ARMPCLabelIndex = AFI->createPICLabelUId();
+ unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ }
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue Chain = Result.getValue(1);
+
+ if (RelocM == Reloc::PIC_) {
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ }
+
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+ Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
+ false, false, 0);
+
+ return Result;
+}
+
+SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetELF() &&
+ "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+ "_GLOBAL_OFFSET_TABLE_",
+ ARMPCLabelIndex, PCAdj);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
+ const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Val = DAG.getConstant(0, MVT::i32);
+ return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
+ Op.getOperand(1), Val);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1), DAG.getConstant(0, MVT::i32));
+}
+
+SDValue
+ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) const {
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::arm_thread_pointer: {
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+ }
+ case Intrinsic::eh_sjlj_lsda: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ SDValue CPAddr;
+ unsigned PCAdj = (RelocM != Reloc::PIC_)
+ ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
+ ARMCP::CPLSDA, PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result =
+ DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+
+ if (RelocM == Reloc::PIC_) {
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ }
+ return Result;
+ }
+ }
+}
+
+static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ DebugLoc dl = Op.getDebugLoc();
+ if (!Subtarget->hasDataBarrier()) {
+ // Some ARMv6 cpus can support data barriers with an mcr instruction.
+ // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+ // here.
+ assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
+ "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ SDValue Op5 = Op.getOperand(5);
+ bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
+ unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
+
+ ARM_MB::MemBOpt DMBOpt;
+ if (isDeviceBarrier)
+ DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
+ else
+ DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
+ return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(DMBOpt, MVT::i32));
+}
+
+static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ // ARM pre v5TE and Thumb1 does not have preload instructions.
+ if (!(Subtarget->isThumb2() ||
+ (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
+ // Just preserve the chain.
+ return Op.getOperand(0);
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
+ if (!isRead &&
+ (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
+ // ARMv7 with MP extension has PLDW.
+ return Op.getOperand(0);
+
+ if (Subtarget->isThumb())
+ // Invert the bits.
+ isRead = ~isRead & 1;
+ unsigned isData = Subtarget->isThumb() ? 0 : 1;
+
+ // Currently there is no intrinsic that matches pli.
+ return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
+ DAG.getConstant(isData, MVT::i32));
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ DebugLoc dl = Op.getDebugLoc();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
+}
+
+SDValue
+ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &Root, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ TargetRegisterClass *RC;
+ if (AFI->isThumb1OnlyFunction())
+ RC = ARM::tGPRRegisterClass;
+ else
+ RC = ARM::GPRRegisterClass;
+
+ // Transform the arguments stored in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+ SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+
+ SDValue ArgValue2;
+ if (NextVA.isMemLoc()) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
+
+ // Create load node to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0);
+ } else {
+ Reg = MF.addLiveIn(NextVA.getLocReg(), RC, dl);
+ ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+ }
+
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
+}
+
+SDValue
+ARMTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+ *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins,
+ CCAssignFnForNode(CallConv, /* Return*/ false,
+ isVarArg));
+
+ SmallVector<SDValue, 16> ArgValues;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored in registers.
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+
+ SDValue ArgValue;
+ if (VA.needsCustom()) {
+ // f64 and vector types are split up into multiple registers or
+ // combinations of registers and stack slots.
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
+ Chain, DAG, dl);
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ SDValue ArgValue2;
+ if (VA.isMemLoc()) {
+ int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0);
+ } else {
+ ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
+ Chain, DAG, dl);
+ }
+ ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+ ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+ ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
+ } else
+ ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
+
+ } else {
+ TargetRegisterClass *RC;
+
+ if (RegVT == MVT::f32)
+ RC = ARM::SPRRegisterClass;
+ else if (RegVT == MVT::f64)
+ RC = ARM::DPRRegisterClass;
+ else if (RegVT == MVT::v2f64)
+ RC = ARM::QPRRegisterClass;
+ else if (RegVT == MVT::i32)
+ RC = (AFI->isThumb1OnlyFunction() ?
+ ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
+ else
+ llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
+
+ // Transform the arguments in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+ }
+
+ // If this is an 8 or 16-bit value, it is really passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::SExt:
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::ZExt:
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ break;
+ }
+
+ InVals.push_back(ArgValue);
+
+ } else { // VA.isRegLoc()
+
+ // sanity check
+ assert(VA.isMemLoc());
+ assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
+
+ unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+ int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true);
+
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+ }
+
+ // varargs
+ if (isVarArg) {
+ static const unsigned GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+ };
+
+ unsigned NumGPRs = CCInfo.getFirstUnallocated
+ (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+
+ unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned VARegSize = (4 - NumGPRs) * 4;
+ unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+ unsigned ArgOffset = CCInfo.getNextStackOffset();
+ if (VARegSaveSize) {
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ AFI->setVarArgsRegSaveSize(VARegSaveSize);
+ AFI->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(VARegSaveSize,
+ ArgOffset + VARegSaveSize - VARegSize,
+ false));
+ SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
+ getPointerTy());
+
+ SmallVector<SDValue, 4> MemOps;
+ for (; NumGPRs < 4; ++NumGPRs) {
+ TargetRegisterClass *RC;
+ if (AFI->isThumb1OnlyFunction())
+ RC = ARM::tGPRRegisterClass;
+ else
+ RC = ARM::GPRRegisterClass;
+
+ unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC, dl);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
+ false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
+ }
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ } else
+ // This will point to the next argument passed via stack.
+ AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
+ }
+
+ return Chain;
+}
+
+/// isFloatingPointZero - Return true if this is +0.0.
+static bool isFloatingPointZero(SDValue Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->getValueAPF().isPosZero();
+ else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+ // Maybe this has already been legalized into the constant pool?
+ if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
+ SDValue WrapperOp = Op.getOperand(1).getOperand(0);
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->getValueAPF().isPosZero();
+ }
+ }
+ return false;
+}
+
+/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
+/// the given operands.
+SDValue
+ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMcc, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+ unsigned C = RHSC->getZExtValue();
+ if (!isLegalICmpImmediate(C)) {
+ // Constant does not fit, try adjusting it by one?
+ switch (CC) {
+ default: break;
+ case ISD::SETLT:
+ case ISD::SETGE:
+ if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+ RHS = DAG.getConstant(C-1, MVT::i32);
+ }
+ break;
+ case ISD::SETULT:
+ case ISD::SETUGE:
+ if (C != 0 && isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+ RHS = DAG.getConstant(C-1, MVT::i32);
+ }
+ break;
+ case ISD::SETLE:
+ case ISD::SETGT:
+ if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+ RHS = DAG.getConstant(C+1, MVT::i32);
+ }
+ break;
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ RHS = DAG.getConstant(C+1, MVT::i32);
+ }
+ break;
+ }
+ }
+ }
+
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMISD::NodeType CompareType;
+ switch (CondCode) {
+ default:
+ CompareType = ARMISD::CMP;
+ break;
+ case ARMCC::EQ:
+ case ARMCC::NE:
+ // Uses only Z Flag
+ CompareType = ARMISD::CMPZ;
+ break;
+ }
+ ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
+}
+
+/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
+SDValue
+ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ SDValue Cmp;
+ if (!isFloatingPointZero(RHS))
+ Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
+ else
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
+ return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
+}
+
+SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = Op.getOperand(0);
+ SDValue SelectTrue = Op.getOperand(1);
+ SDValue SelectFalse = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Convert:
+ //
+ // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
+ // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
+ //
+ if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
+ const ConstantSDNode *CMOVTrue =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(0));
+ const ConstantSDNode *CMOVFalse =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+
+ if (CMOVTrue && CMOVFalse) {
+ unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
+ unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
+
+ SDValue True;
+ SDValue False;
+ if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
+ True = SelectTrue;
+ False = SelectFalse;
+ } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
+ True = SelectFalse;
+ False = SelectTrue;
+ }
+
+ if (True.getNode() && False.getNode()) {
+ EVT VT = Cond.getValueType();
+ SDValue ARMcc = Cond.getOperand(2);
+ SDValue CCR = Cond.getOperand(3);
+ SDValue Cmp = Cond.getOperand(4);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+ }
+ }
+ }
+
+ return DAG.getSelectCC(dl, Cond,
+ DAG.getConstant(0, Cond.getValueType()),
+ SelectTrue, SelectFalse, ISD::SETNE);
+}
+
+SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDValue ARMcc;
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
+ }
+
+ ARMCC::CondCodes CondCode, CondCode2;
+ FPCCToARMCC(CC, CondCode, CondCode2);
+
+ SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
+ ARMcc, CCR, Cmp);
+ if (CondCode2 != ARMCC::AL) {
+ SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
+ // FIXME: Needs another CMP because flag can have but one use.
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+ Result = DAG.getNode(ARMISD::CMOV, dl, VT,
+ Result, TrueVal, ARMcc2, CCR, Cmp2);
+ }
+ return Result;
+}
+
+/// canChangeToInt - Given the fp compare operand, return true if it is suitable
+/// to morph to an integer compare sequence.
+static bool canChangeToInt(SDValue Op, bool &SeenZero,
+ const ARMSubtarget *Subtarget) {
+ SDNode *N = Op.getNode();
+ if (!N->hasOneUse())
+ // Otherwise it requires moving the value from fp to integer registers.
+ return false;
+ if (!N->getNumValues())
+ return false;
+ EVT VT = Op.getValueType();
+ if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
+ // f32 case is generally profitable. f64 case only makes sense when vcmpe +
+ // vmrs are very slow, e.g. cortex-a8.
+ return false;
+
+ if (isFloatingPointZero(Op)) {
+ SeenZero = true;
+ return true;
+ }
+ return ISD::isNormalLoad(N);
+}
+
+static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
+ if (isFloatingPointZero(Op))
+ return DAG.getConstant(0, MVT::i32);
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
+ return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
+ SDValue &RetVal1, SDValue &RetVal2) {
+ if (isFloatingPointZero(Op)) {
+ RetVal1 = DAG.getConstant(0, MVT::i32);
+ RetVal2 = DAG.getConstant(0, MVT::i32);
+ return;
+ }
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
+ SDValue Ptr = Ld->getBasePtr();
+ RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ptr,
+ Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+
+ EVT PtrType = Ptr.getValueType();
+ unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
+ PtrType, Ptr, DAG.getConstant(4, PtrType));
+ RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), NewPtr,
+ Ld->getPointerInfo().getWithOffset(4),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ NewAlign);
+ return;
+ }
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
+/// f32 and even f64 comparisons to integer ones.
+SDValue
+ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ bool SeenZero = false;
+ if (canChangeToInt(LHS, SeenZero, Subtarget) &&
+ canChangeToInt(RHS, SeenZero, Subtarget) &&
+ // If one of the operand is zero, it's safe to ignore the NaN case since
+ // we only care about equality comparisons.
+ (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
+ // If unsafe fp math optimization is enabled and there are no othter uses of
+ // the CMP operands, and the condition code is EQ oe NE, we can optimize it
+ // to an integer comparison.
+ if (CC == ISD::SETOEQ)
+ CC = ISD::SETEQ;
+ else if (CC == ISD::SETUNE)
+ CC = ISD::SETNE;
+
+ SDValue ARMcc;
+ if (LHS.getValueType() == MVT::f32) {
+ LHS = bitcastf32Toi32(LHS, DAG);
+ RHS = bitcastf32Toi32(RHS, DAG);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMcc, CCR, Cmp);
+ }
+
+ SDValue LHS1, LHS2;
+ SDValue RHS1, RHS2;
+ expandf64Toi32(LHS, DAG, LHS1, LHS2);
+ expandf64Toi32(RHS, DAG, RHS1, RHS2);
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
+ return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
+ }
+
+ return SDValue();
+}
+
+SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDValue ARMcc;
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMcc, CCR, Cmp);
+ }
+
+ assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+
+ if (UnsafeFPMath &&
+ (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
+ CC == ISD::SETNE || CC == ISD::SETUNE)) {
+ SDValue Result = OptimizeVFPBrcond(Op, DAG);
+ if (Result.getNode())
+ return Result;
+ }
+
+ ARMCC::CondCodes CondCode, CondCode2;
+ FPCCToARMCC(CC, CondCode, CondCode2);
+
+ SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
+ SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ if (CondCode2 != ARMCC::AL) {
+ ARMcc = DAG.getConstant(CondCode2, MVT::i32);
+ SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
+ Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ }
+ return Res;
+}
+
+SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT PTy = getPointerTy();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+ SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+ Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
+ Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+ if (Subtarget->isThumb2()) {
+ // Thumb2 uses a two-level jump. That is, it jumps into the jump table
+ // which does another jump to the destination. This also makes it easier
+ // to translate it to TBB / TBH later.
+ // FIXME: This might not work if the function is extremely large.
+ return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
+ Addr, Op.getOperand(2), JTI, UId);
+ }
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(),
+ false, false, 0);
+ Chain = Addr.getValue(1);
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
+ return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+ } else {
+ Addr = DAG.getLoad(PTy, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(), false, false, 0);
+ Chain = Addr.getValue(1);
+ return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+ }
+}
+
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc;
+
+ switch (Op.getOpcode()) {
+ default:
+ assert(0 && "Invalid opcode!");
+ case ISD::FP_TO_SINT:
+ Opc = ARMISD::FTOSI;
+ break;
+ case ISD::FP_TO_UINT:
+ Opc = ARMISD::FTOUI;
+ break;
+ }
+ Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+}
+
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc;
+
+ switch (Op.getOpcode()) {
+ default:
+ assert(0 && "Invalid opcode!");
+ case ISD::SINT_TO_FP:
+ Opc = ARMISD::SITOF;
+ break;
+ case ISD::UINT_TO_FP:
+ Opc = ARMISD::UITOF;
+ break;
+ }
+
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(Opc, dl, VT, Op);
+}
+
+SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+ // Implement fcopysign with a fabs and a conditional fneg.
+ SDValue Tmp0 = Op.getOperand(0);
+ SDValue Tmp1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT SrcVT = Tmp1.getValueType();
+ bool F2IisFast = Subtarget->isCortexA9() ||
+ Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR;
+
+ // Bitcast operand 1 to i32.
+ if (SrcVT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp1, 1).getValue(1);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
+
+ // If float to int conversion isn't going to be super expensive, then simply
+ // or in the signbit.
+ if (F2IisFast) {
+ SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
+ SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+ Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+ if (VT == MVT::f32) {
+ Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
+ }
+
+ // f64: Or the high part with signbit and then combine two parts.
+ Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp0, 1);
+ SDValue Lo = Tmp0.getValue(0);
+ SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+ Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+ }
+
+ // Remove the signbit of operand 0.
+ Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
+
+ // If operand 1 signbit is one, then negate operand 0.
+ SDValue ARMcc;
+ SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32),
+ ISD::SETLT, ARMcc, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp);
+}
+
+SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(4, MVT::i32);
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+ MachinePointerInfo(), false, false, 0);
+ }
+
+ // Return LR, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32), dl);
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
+ ? ARM::R7 : ARM::R11;
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, 0);
+ return FrameAddr;
+}
+
+/// ExpandBITCAST - If the target supports VFP, this function is called to
+/// expand a bit convert where either the source or destination type is i64 to
+/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
+/// operand type is illegal (e.g., v2f32 for a target that doesn't support
+/// vectors), since the legalizer won't know what to do with that.
+static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+
+ // This function is only supposed to be called for i64 types, either as the
+ // source or destination of the bit convert.
+ EVT SrcVT = Op.getValueType();
+ EVT DstVT = N->getValueType(0);
+ assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
+ "ExpandBITCAST called for non-i64 type");
+
+ // Turn i64->f64 into VMOVDRR.
+ if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+ DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, DstVT,
+ DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
+ }
+
+ // Turn f64->i64 into VMOVRRD.
+ if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
+ SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
+ }
+
+ return SDValue();
+}
+
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+/// Zero vectors are used to represent vector negation and in those cases
+/// will be implemented with the NEON VNEG instruction. However, VNEG does
+/// not support i64 elements, so sometimes the zero vectors will need to be
+/// explicitly constructed. Regardless, use a canonical VMOV to create the
+/// zero vector.
+static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+ // The canonical modified immediate encoding of a zero vector is....0!
+ SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
+ EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+}
+
+/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue ARMcc;
+ unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
+
+ assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
+
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(VTBits, MVT::i32), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32));
+ SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
+ SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
+ ARMcc, DAG, dl);
+ SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+ SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
+ CCR, Cmp);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue ARMcc;
+
+ assert(Op.getOpcode() == ISD::SHL_PARTS);
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(VTBits, MVT::i32), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32));
+ SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+ SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+
+ SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
+ ARMcc, DAG, dl);
+ SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
+ CCR, Cmp);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ // The rounding mode is in bits 23:22 of the FPSCR.
+ // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
+ // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
+ // so that the shift + and get folded into a bitfield extract.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+ DAG.getConstant(Intrinsic::arm_get_fpscr,
+ MVT::i32));
+ SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
+ DAG.getConstant(1U << 22, MVT::i32));
+ SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
+ DAG.getConstant(22, MVT::i32));
+ return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+ DAG.getConstant(3, MVT::i32));
+}
+
+static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (!ST->hasV6T2Ops())
+ return SDValue();
+
+ SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
+ return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
+}
+
+static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (!VT.isVector())
+ return SDValue();
+
+ // Lower vector shifts on NEON to use VSHL.
+ assert(ST->hasNEON() && "unexpected vector shift");
+
+ // Left shifts translate directly to the vshiftu intrinsic.
+ if (N->getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
+ N->getOperand(0), N->getOperand(1));
+
+ assert((N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
+
+ // NEON uses the same intrinsics for both left and right shifts. For
+ // right shifts, the shift amounts are negative, so negate the vector of
+ // shift amounts.
+ EVT ShiftVT = N->getOperand(1).getValueType();
+ SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
+ getZeroVector(ShiftVT, DAG, dl),
+ N->getOperand(1));
+ Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
+ Intrinsic::arm_neon_vshifts :
+ Intrinsic::arm_neon_vshiftu);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(vshiftInt, MVT::i32),
+ N->getOperand(0), NegatedCount);
+}
+
+static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // We can get here for a node like i32 = ISD::SHL i32, i64
+ if (VT != MVT::i64)
+ return SDValue();
+
+ assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+ "Unknown shift to lower!");
+
+ // We only lower SRA, SRL of 1 here, all others use generic lowering.
+ if (!isa<ConstantSDNode>(N->getOperand(1)) ||
+ cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
+ return SDValue();
+
+ // If we are in thumb mode, we don't have RRX.
+ if (ST->isThumb1Only()) return SDValue();
+
+ // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(1, MVT::i32));
+
+ // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
+ // captures the result into a carry flag.
+ unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
+ Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
+
+ // The low part is an ARMISD::RRX operand, which shifts the carry in.
+ Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
+
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
+ SDValue TmpOp0, TmpOp1;
+ bool Invert = false;
+ bool Swap = false;
+ unsigned Opc = 0;
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ EVT VT = Op.getValueType();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getOperand(1).getValueType().isFloatingPoint()) {
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Illegal FP comparison"); break;
+ case ISD::SETUNE:
+ case ISD::SETNE: Invert = true; // Fallthrough
+ case ISD::SETOEQ:
+ case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
+ case ISD::SETOLT:
+ case ISD::SETLT: Swap = true; // Fallthrough
+ case ISD::SETOGT:
+ case ISD::SETGT: Opc = ARMISD::VCGT; break;
+ case ISD::SETOLE:
+ case ISD::SETLE: Swap = true; // Fallthrough
+ case ISD::SETOGE:
+ case ISD::SETGE: Opc = ARMISD::VCGE; break;
+ case ISD::SETUGE: Swap = true; // Fallthrough
+ case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
+ case ISD::SETUGT: Swap = true; // Fallthrough
+ case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
+ case ISD::SETUEQ: Invert = true; // Fallthrough
+ case ISD::SETONE:
+ // Expand this to (OLT | OGT).
+ TmpOp0 = Op0;
+ TmpOp1 = Op1;
+ Opc = ISD::OR;
+ Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
+ Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
+ break;
+ case ISD::SETUO: Invert = true; // Fallthrough
+ case ISD::SETO:
+ // Expand this to (OLT | OGE).
+ TmpOp0 = Op0;
+ TmpOp1 = Op1;
+ Opc = ISD::OR;
+ Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
+ Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
+ break;
+ }
+ } else {
+ // Integer comparisons.
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Illegal integer comparison"); break;
+ case ISD::SETNE: Invert = true;
+ case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
+ case ISD::SETLT: Swap = true;
+ case ISD::SETGT: Opc = ARMISD::VCGT; break;
+ case ISD::SETLE: Swap = true;
+ case ISD::SETGE: Opc = ARMISD::VCGE; break;
+ case ISD::SETULT: Swap = true;
+ case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
+ case ISD::SETULE: Swap = true;
+ case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
+ }
+
+ // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
+ if (Opc == ARMISD::VCEQ) {
+
+ SDValue AndOp;
+ if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+ AndOp = Op0;
+ else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
+ AndOp = Op1;
+
+ // Ignore bitconvert.
+ if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
+ AndOp = AndOp.getOperand(0);
+
+ if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
+ Opc = ARMISD::VTST;
+ Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
+ Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
+ Invert = !Invert;
+ }
+ }
+ }
+
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // If one of the operands is a constant vector zero, attempt to fold the
+ // comparison to a specialized compare-against-zero form.
+ SDValue SingleOp;
+ if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+ SingleOp = Op0;
+ else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
+ if (Opc == ARMISD::VCGE)
+ Opc = ARMISD::VCLEZ;
+ else if (Opc == ARMISD::VCGT)
+ Opc = ARMISD::VCLTZ;
+ SingleOp = Op1;
+ }
+
+ SDValue Result;
+ if (SingleOp.getNode()) {
+ switch (Opc) {
+ case ARMISD::VCEQ:
+ Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
+ case ARMISD::VCGE:
+ Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
+ case ARMISD::VCLEZ:
+ Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
+ case ARMISD::VCGT:
+ Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
+ case ARMISD::VCLTZ:
+ Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
+ default:
+ Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ }
+ } else {
+ Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ }
+
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, VT);
+
+ return Result;
+}
+
+/// isNEONModifiedImm - Check if the specified splat value corresponds to a
+/// valid vector constant for a NEON instruction with a "modified immediate"
+/// operand (e.g., VMOV). If so, return the encoded value.
+static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
+ unsigned SplatBitSize, SelectionDAG &DAG,
+ EVT &VT, bool is128Bits, NEONModImmType type) {
+ unsigned OpCmode, Imm;
+
+ // SplatBitSize is set to the smallest size that splats the vector, so a
+ // zero vector will always have SplatBitSize == 8. However, NEON modified
+ // immediate instructions others than VMOV do not support the 8-bit encoding
+ // of a zero vector, and the default encoding of zero is supposed to be the
+ // 32-bit version.
+ if (SplatBits == 0)
+ SplatBitSize = 32;
+
+ switch (SplatBitSize) {
+ case 8:
+ if (type != VMOVModImm)
+ return SDValue();
+ // Any 1-byte value is OK. Op=0, Cmode=1110.
+ assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
+ OpCmode = 0xe;
+ Imm = SplatBits;
+ VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
+ break;
+
+ case 16:
+ // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
+ VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x00nn: Op=x, Cmode=100x.
+ OpCmode = 0x8;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0xnn00: Op=x, Cmode=101x.
+ OpCmode = 0xa;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ return SDValue();
+
+ case 32:
+ // NEON's 32-bit VMOV supports splat values where:
+ // * only one byte is nonzero, or
+ // * the least significant byte is 0xff and the second byte is nonzero, or
+ // * the least significant 2 bytes are 0xff and the third is nonzero.
+ VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x000000nn: Op=x, Cmode=000x.
+ OpCmode = 0;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0x0000nn00: Op=x, Cmode=001x.
+ OpCmode = 0x2;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ if ((SplatBits & ~0xff0000) == 0) {
+ // Value = 0x00nn0000: Op=x, Cmode=010x.
+ OpCmode = 0x4;
+ Imm = SplatBits >> 16;
+ break;
+ }
+ if ((SplatBits & ~0xff000000) == 0) {
+ // Value = 0xnn000000: Op=x, Cmode=011x.
+ OpCmode = 0x6;
+ Imm = SplatBits >> 24;
+ break;
+ }
+
+ // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
+ if (type == OtherModImm) return SDValue();
+
+ if ((SplatBits & ~0xffff) == 0 &&
+ ((SplatBits | SplatUndef) & 0xff) == 0xff) {
+ // Value = 0x0000nnff: Op=x, Cmode=1100.
+ OpCmode = 0xc;
+ Imm = SplatBits >> 8;
+ SplatBits |= 0xff;
+ break;
+ }
+
+ if ((SplatBits & ~0xffffff) == 0 &&
+ ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
+ // Value = 0x00nnffff: Op=x, Cmode=1101.
+ OpCmode = 0xd;
+ Imm = SplatBits >> 16;
+ SplatBits |= 0xffff;
+ break;
+ }
+
+ // Note: there are a few 32-bit splat values (specifically: 00ffff00,
+ // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
+ // VMOV.I32. A (very) minor optimization would be to replicate the value
+ // and fall through here to test for a valid 64-bit splat. But, then the
+ // caller would also need to check and handle the change in size.
+ return SDValue();
+
+ case 64: {
+ if (type != VMOVModImm)
+ return SDValue();
+ // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
+ uint64_t BitMask = 0xff;
+ uint64_t Val = 0;
+ unsigned ImmMask = 1;
+ Imm = 0;
+ for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
+ if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
+ Val |= BitMask;
+ Imm |= ImmMask;
+ } else if ((SplatBits & BitMask) != 0) {
+ return SDValue();
+ }
+ BitMask <<= 8;
+ ImmMask <<= 1;
+ }
+ // Op=1, Cmode=1110.
+ OpCmode = 0x1e;
+ SplatBits = Val;
+ VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
+ break;
+ }
+
+ default:
+ llvm_unreachable("unexpected size for isNEONModifiedImm");
+ return SDValue();
+ }
+
+ unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
+ return DAG.getTargetConstant(EncodedVal, MVT::i32);
+}
+
+static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
+ bool &ReverseVEXT, unsigned &Imm) {
+ unsigned NumElts = VT.getVectorNumElements();
+ ReverseVEXT = false;
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
+ Imm = M[0];
+
+ // If this is a VEXT shuffle, the immediate value is the index of the first
+ // element. The other shuffle indices must be the successive elements after
+ // the first one.
+ unsigned ExpectedElt = Imm;
+ for (unsigned i = 1; i < NumElts; ++i) {
+ // Increment the expected index. If it wraps around, it may still be
+ // a VEXT but the source vectors must be swapped.
+ ExpectedElt += 1;
+ if (ExpectedElt == NumElts * 2) {
+ ExpectedElt = 0;
+ ReverseVEXT = true;
+ }
+
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if (ExpectedElt != static_cast<unsigned>(M[i]))
+ return false;
+ }
+
+ // Adjust the index value if the source operands will be swapped.
+ if (ReverseVEXT)
+ Imm -= NumElts;
+
+ return true;
+}
+
+/// isVREVMask - Check if a vector shuffle corresponds to a VREV
+/// instruction with the specified blocksize. (The order of the elements
+/// within each block of the vector is reversed.)
+static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned BlockSize) {
+ assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
+ "Only possible block sizes for VREV are: 16, 32, 64");
+
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned BlockElts = M[0] + 1;
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSz;
+
+ if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
+ return false;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+ return false;
+ }
+
+ return true;
+}
+
+static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
+ return false;
+ }
+ return true;
+}
+
+/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
+static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
+ return false;
+ }
+ return true;
+}
+
+static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != 2 * i + WhichResult)
+ return false;
+ }
+
+ // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
+static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned Half = VT.getVectorNumElements() / 2;
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned j = 0; j != 2; ++j) {
+ unsigned Idx = WhichResult;
+ for (unsigned i = 0; i != Half; ++i) {
+ int MIdx = M[i + j * Half];
+ if (MIdx >= 0 && (unsigned) MIdx != Idx)
+ return false;
+ Idx += 2;
+ }
+ }
+
+ // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned i = 0; i != NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
+ return false;
+ Idx += 1;
+ }
+
+ // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
+static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned i = 0; i != NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
+ return false;
+ Idx += 1;
+ }
+
+ // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+// If N is an integer constant that can be moved into a register in one
+// instruction, return an SDValue of such a constant (will become a MOV
+// instruction). Otherwise return null.
+static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
+ const ARMSubtarget *ST, DebugLoc dl) {
+ uint64_t Val;
+ if (!isa<ConstantSDNode>(N))
+ return SDValue();
+ Val = cast<ConstantSDNode>(N)->getZExtValue();
+
+ if (ST->isThumb1Only()) {
+ if (Val <= 255 || ~Val <= 255)
+ return DAG.getConstant(Val, MVT::i32);
+ } else {
+ if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
+ return DAG.getConstant(Val, MVT::i32);
+ }
+ return SDValue();
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it.
+SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const {
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ // Check if an immediate VMOV works.
+ EVT VmovVT;
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VmovVT, VT.is128BitVector(),
+ VMOVModImm);
+ if (Val.getNode()) {
+ SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ }
+
+ // Try an immediate VMVN.
+ uint64_t NegatedImm = (SplatBits.getZExtValue() ^
+ ((1LL << SplatBitSize) - 1));
+ Val = isNEONModifiedImm(NegatedImm,
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VmovVT, VT.is128BitVector(),
+ VMVNModImm);
+ if (Val.getNode()) {
+ SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ }
+ }
+ }
+
+ // Scan through the operands to see if only one value is used.
+ unsigned NumElts = VT.getVectorNumElements();
+ bool isOnlyLowElement = true;
+ bool usesOnlyOneValue = true;
+ bool isConstant = true;
+ SDValue Value;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ if (!Value.getNode())
+ Value = V;
+ else if (V != Value)
+ usesOnlyOneValue = false;
+ }
+
+ if (!Value.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+
+ // Use VDUP for non-constant splats. For f32 constant splats, reduce to
+ // i32 and try again.
+ if (usesOnlyOneValue && EltSize <= 32) {
+ if (!isConstant)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (VT.getVectorElementType().isFloatingPoint()) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+ Op.getOperand(i)));
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
+ Val = LowerBUILD_VECTOR(Val, DAG, ST);
+ if (Val.getNode())
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ }
+
+ // If all elements are constants and the case above didn't get hit, fall back
+ // to the default expansion, which will generate a load from the constant
+ // pool.
+ if (isConstant)
+ return SDValue();
+
+ // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
+ if (NumElts >= 4) {
+ SDValue shuffle = ReconstructShuffle(Op, DAG);
+ if (shuffle != SDValue())
+ return shuffle;
+ }
+
+ // Vectors with 32- or 64-bit elements can be built by directly assigning
+ // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
+ // will be legalized.
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+
+ return SDValue();
+}
+
+// Gather data to see if the operation can be modelled as a
+// shuffle in combination with VEXTs.
+SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 2> SourceVecs;
+ SmallVector<unsigned, 2> MinElts;
+ SmallVector<unsigned, 2> MaxElts;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
+ // A shuffle can only come from building a vector from various
+ // elements of other vectors.
+ return SDValue();
+ }
+
+ // Record this extraction against the appropriate vector if possible...
+ SDValue SourceVec = V.getOperand(0);
+ unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+ bool FoundSource = false;
+ for (unsigned j = 0; j < SourceVecs.size(); ++j) {
+ if (SourceVecs[j] == SourceVec) {
+ if (MinElts[j] > EltNo)
+ MinElts[j] = EltNo;
+ if (MaxElts[j] < EltNo)
+ MaxElts[j] = EltNo;
+ FoundSource = true;
+ break;
+ }
+ }
+
+ // Or record a new source if not...
+ if (!FoundSource) {
+ SourceVecs.push_back(SourceVec);
+ MinElts.push_back(EltNo);
+ MaxElts.push_back(EltNo);
+ }
+ }
+
+ // Currently only do something sane when at most two source vectors
+ // involved.
+ if (SourceVecs.size() > 2)
+ return SDValue();
+
+ SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
+ int VEXTOffsets[2] = {0, 0};
+
+ // This loop extracts the usage patterns of the source vectors
+ // and prepares appropriate SDValues for a shuffle if possible.
+ for (unsigned i = 0; i < SourceVecs.size(); ++i) {
+ if (SourceVecs[i].getValueType() == VT) {
+ // No VEXT necessary
+ ShuffleSrcs[i] = SourceVecs[i];
+ VEXTOffsets[i] = 0;
+ continue;
+ } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+ // It probably isn't worth padding out a smaller vector just to
+ // break it down again in a shuffle.
+ return SDValue();
+ }
+
+ // Since only 64-bit and 128-bit vectors are legal on ARM and
+ // we've eliminated the other cases...
+ assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
+ "unexpected vector sizes in ReconstructShuffle");
+
+ if (MaxElts[i] - MinElts[i] >= NumElts) {
+ // Span too large for a VEXT to cope
+ return SDValue();
+ }
+
+ if (MinElts[i] >= NumElts) {
+ // The extraction can just take the second half
+ VEXTOffsets[i] = NumElts;
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(NumElts));
+ } else if (MaxElts[i] < NumElts) {
+ // The extraction can just take the first half
+ VEXTOffsets[i] = 0;
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(0));
+ } else {
+ // An actual VEXT is needed
+ VEXTOffsets[i] = MinElts[i];
+ SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(0));
+ SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(NumElts));
+ ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
+ DAG.getConstant(VEXTOffsets[i], MVT::i32));
+ }
+ }
+
+ SmallVector<int, 8> Mask;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Entry = Op.getOperand(i);
+ if (Entry.getOpcode() == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ SDValue ExtractVec = Entry.getOperand(0);
+ int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
+ .getOperand(1))->getSExtValue();
+ if (ExtractVec == SourceVecs[0]) {
+ Mask.push_back(ExtractElt - VEXTOffsets[0]);
+ } else {
+ Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+ }
+ }
+
+ // Final check before we try to produce nonsense...
+ if (isShuffleMaskLegal(Mask, VT))
+ return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
+ &Mask[0]);
+
+ return SDValue();
+}
+
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+ EVT VT) const {
+ if (VT.getVectorNumElements() == 4 &&
+ (VT.is128BitVector() || VT.is64BitVector())) {
+ unsigned PFIndexes[4];
+ for (unsigned i = 0; i != 4; ++i) {
+ if (M[i] < 0)
+ PFIndexes[i] = 8;
+ else
+ PFIndexes[i] = M[i];
+ }
+
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ if (Cost <= 4)
+ return true;
+ }
+
+ bool ReverseVEXT;
+ unsigned Imm, WhichResult;
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ return (EltSize >= 32 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ isVREVMask(M, VT, 64) ||
+ isVREVMask(M, VT, 32) ||
+ isVREVMask(M, VT, 16) ||
+ isVEXTMask(M, VT, ReverseVEXT, Imm) ||
+ isVTRNMask(M, VT, WhichResult) ||
+ isVUZPMask(M, VT, WhichResult) ||
+ isVZIPMask(M, VT, WhichResult) ||
+ isVTRN_v_undef_Mask(M, VT, WhichResult) ||
+ isVUZP_v_undef_Mask(M, VT, WhichResult) ||
+ isVZIP_v_undef_Mask(M, VT, WhichResult));
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ DebugLoc dl) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
+
+ enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VREV,
+ OP_VDUP0,
+ OP_VDUP1,
+ OP_VDUP2,
+ OP_VDUP3,
+ OP_VEXT1,
+ OP_VEXT2,
+ OP_VEXT3,
+ OP_VUZPL, // VUZP, left result
+ OP_VUZPR, // VUZP, right result
+ OP_VZIPL, // VZIP, left result
+ OP_VZIPR, // VZIP, right result
+ OP_VTRNL, // VTRN, left result
+ OP_VTRNR // VTRN, right result
+ };
+
+ if (OpNum == OP_COPY) {
+ if (LHSID == (1*9+2)*9+3) return LHS;
+ assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+ return RHS;
+ }
+
+ SDValue OpLHS, OpRHS;
+ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+ OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+ EVT VT = OpLHS.getValueType();
+
+ switch (OpNum) {
+ default: llvm_unreachable("Unknown shuffle opcode!");
+ case OP_VREV:
+ return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
+ case OP_VDUP0:
+ case OP_VDUP1:
+ case OP_VDUP2:
+ case OP_VDUP3:
+ return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
+ case OP_VEXT1:
+ case OP_VEXT2:
+ case OP_VEXT3:
+ return DAG.getNode(ARMISD::VEXT, dl, VT,
+ OpLHS, OpRHS,
+ DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
+ case OP_VUZPL:
+ case OP_VUZPR:
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
+ case OP_VZIPL:
+ case OP_VZIPR:
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
+ case OP_VTRNL:
+ case OP_VTRNR:
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
+ }
+}
+
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+ SmallVector<int, 8> ShuffleMask;
+
+ // Convert shuffles that are directly supported on NEON to target-specific
+ // DAG nodes, instead of keeping them as shuffles and matching them again
+ // during code selection. This is more efficient and avoids the possibility
+ // of inconsistencies between legalization and selection.
+ // FIXME: floating-point vectors should be canonicalized to integer vectors
+ // of the same time so that they get CSEd properly.
+ SVN->getMask(ShuffleMask);
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize <= 32) {
+ if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+ int Lane = SVN->getSplatIndex();
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane == -1) Lane = 0;
+
+ if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
+ DAG.getConstant(Lane, MVT::i32));
+ }
+
+ bool ReverseVEXT;
+ unsigned Imm;
+ if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
+ if (ReverseVEXT)
+ std::swap(V1, V2);
+ return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
+ DAG.getConstant(Imm, MVT::i32));
+ }
+
+ if (isVREVMask(ShuffleMask, VT, 64))
+ return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 32))
+ return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 16))
+ return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
+
+ // Check for Neon shuffles that modify both input vectors in place.
+ // If both results are used, i.e., if there are two shuffles with the same
+ // source operands and with masks corresponding to both results of one of
+ // these operations, DAG memoization will ensure that a single node is
+ // used for both shuffles.
+ unsigned WhichResult;
+ if (isVTRNMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVUZPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVZIPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+
+ if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ }
+
+ // If the shuffle is not directly supported and it has 4 elements, use
+ // the PerfectShuffle-generated table to synthesize it from other shuffles.
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts == 4) {
+ unsigned PFIndexes[4];
+ for (unsigned i = 0; i != 4; ++i) {
+ if (ShuffleMask[i] < 0)
+ PFIndexes[i] = 8;
+ else
+ PFIndexes[i] = ShuffleMask[i];
+ }
+
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ if (Cost <= 4)
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ }
+
+ // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (ShuffleMask[i] < 0)
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ ShuffleMask[i] < (int)NumElts ? V1 : V2,
+ DAG.getConstant(ShuffleMask[i] & (NumElts-1),
+ MVT::i32)));
+ }
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
+ SDValue Lane = Op.getOperand(1);
+ if (!isa<ConstantSDNode>(Lane))
+ return SDValue();
+
+ SDValue Vec = Op.getOperand(0);
+ if (Op.getValueType() == MVT::i32 &&
+ Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
+ }
+
+ return Op;
+}
+
+static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+ // The only time a CONCAT_VECTORS operation can have legal types is when
+ // two 64-bit vectors are concatenated to a 128-bit vector.
+ assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
+ "unexpected CONCAT_VECTORS");
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Val = DAG.getUNDEF(MVT::v2f64);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (Op0.getOpcode() != ISD::UNDEF)
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
+ DAG.getIntPtrConstant(0));
+ if (Op1.getOpcode() != ISD::UNDEF)
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
+ DAG.getIntPtrConstant(1));
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
+}
+
+/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
+/// element has been zero/sign-extended, depending on the isSigned parameter,
+/// from an integer type half its size.
+static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
+ bool isSigned) {
+ // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
+ EVT VT = N->getValueType(0);
+ if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
+ SDNode *BVN = N->getOperand(0).getNode();
+ if (BVN->getValueType(0) != MVT::v4i32 ||
+ BVN->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+ unsigned HiElt = 1 - LoElt;
+ ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
+ ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
+ ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
+ ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
+ if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
+ return false;
+ if (isSigned) {
+ if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
+ Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
+ return true;
+ } else {
+ if (Hi0->isNullValue() && Hi1->isNullValue())
+ return true;
+ }
+ return false;
+ }
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *Elt = N->getOperand(i).getNode();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ unsigned HalfSize = EltSize / 2;
+ if (isSigned) {
+ int64_t SExtVal = C->getSExtValue();
+ if ((SExtVal >> HalfSize) != (SExtVal >> EltSize))
+ return false;
+ } else {
+ if ((C->getZExtValue() >> HalfSize) != 0)
+ return false;
+ }
+ continue;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+/// isSignExtended - Check if a node is a vector value that is sign-extended
+/// or a constant BUILD_VECTOR with sign-extended elements.
+static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, true))
+ return true;
+ return false;
+}
+
+/// isZeroExtended - Check if a node is a vector value that is zero-extended
+/// or a constant BUILD_VECTOR with zero-extended elements.
+static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, false))
+ return true;
+ return false;
+}
+
+/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
+/// load, or BUILD_VECTOR with extended elements, return the unextended value.
+static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+ return N->getOperand(0);
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
+ LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+ // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
+ // have been legalized as a BITCAST from v4i32.
+ if (N->getOpcode() == ISD::BITCAST) {
+ SDNode *BVN = N->getOperand(0).getNode();
+ assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
+ BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
+ unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
+ BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
+ }
+ // Construct a new BUILD_VECTOR with elements truncated to half the size.
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
+ EVT VT = N->getValueType(0);
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT TruncVT = MVT::getIntegerVT(EltSize);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
+ const APInt &CInt = C->getAPIntValue();
+ Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
+}
+
+static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
+ // Multiplications are only custom-lowered for 128-bit vectors so that
+ // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
+ EVT VT = Op.getValueType();
+ assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
+ SDNode *N0 = Op.getOperand(0).getNode();
+ SDNode *N1 = Op.getOperand(1).getNode();
+ unsigned NewOpc = 0;
+ if (isSignExtended(N0, DAG) && isSignExtended(N1, DAG))
+ NewOpc = ARMISD::VMULLs;
+ else if (isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG))
+ NewOpc = ARMISD::VMULLu;
+ else if (VT == MVT::v2i64)
+ // Fall through to expand this. It is not legal.
+ return SDValue();
+ else
+ // Other vector multiplications are legal.
+ return Op;
+
+ // Legalize to a VMULL instruction.
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Op0 = SkipExtension(N0, DAG);
+ SDValue Op1 = SkipExtension(N1, DAG);
+
+ assert(Op0.getValueType().is64BitVector() &&
+ Op1.getValueType().is64BitVector() &&
+ "unexpected types for extended operands to VMULL");
+ return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+}
+
+static SDValue
+LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
+ // Convert to float
+ // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
+ // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
+ X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
+ Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
+ X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
+ Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
+ // Get reciprocal estimate.
+ // float4 recip = vrecpeq_f32(yf);
+ Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
+ // Because char has a smaller range than uchar, we can actually get away
+ // without any newton steps. This requires that we use a weird bias
+ // of 0xb000, however (again, this has been exhaustively tested).
+ // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
+ X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
+ X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
+ Y = DAG.getConstant(0xb000, MVT::i32);
+ Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
+ X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
+ X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
+ // Convert back to short.
+ X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
+ X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
+ return X;
+}
+
+static SDValue
+LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
+ SDValue N2;
+ // Convert to float.
+ // float4 yf = vcvt_f32_s32(vmovl_s16(y));
+ // float4 xf = vcvt_f32_s32(vmovl_s16(x));
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
+ N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+ N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+ // Use reciprocal estimate and one refinement step.
+ // float4 recip = vrecpeq_f32(yf);
+ // recip *= vrecpsq_f32(yf, recip);
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ N1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ // Because short has a smaller range than ushort, we can actually get away
+ // with only a single newton step. This requires that we use a weird bias
+ // of 89, however (again, this has been exhaustively tested).
+ // float4 result = as_float4(as_int4(xf*recip) + 89);
+ N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+ N1 = DAG.getConstant(89, MVT::i32);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+ // Convert back to integer and return.
+ // return vmovn_s32(vcvt_s32_f32(result));
+ N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+ return N0;
+}
+
+static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+ "unexpected type for custom-lowering ISD::SDIV");
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2, N3;
+
+ if (VT == MVT::v8i8) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
+
+ N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(4));
+ N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(4));
+ N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(0));
+ N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(0));
+
+ N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
+ N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
+
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+ N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
+ return N0;
+ }
+ return LowerSDIV_v4i16(N0, N1, dl, DAG);
+}
+
+static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+ "unexpected type for custom-lowering ISD::UDIV");
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2, N3;
+
+ if (VT == MVT::v8i8) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
+
+ N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(4));
+ N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(4));
+ N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(0));
+ N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(0));
+
+ N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
+ N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
+
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+ N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+ N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
+ DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
+ N0);
+ return N0;
+ }
+
+ // v4i16 sdiv ... Convert to float.
+ // float4 yf = vcvt_f32_s32(vmovl_u16(y));
+ // float4 xf = vcvt_f32_s32(vmovl_u16(x));
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
+ N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+ N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+ // Use reciprocal estimate and two refinement steps.
+ // float4 recip = vrecpeq_f32(yf);
+ // recip *= vrecpsq_f32(yf, recip);
+ // recip *= vrecpsq_f32(yf, recip);
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ N1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ N1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ // Simply multiplying by the reciprocal estimate can leave us a few ulps
+ // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
+ // and that it will never cause us to return an answer too large).
+ // float4 result = as_float4(as_int4(xf*recip) + 89);
+ N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+ N1 = DAG.getConstant(2, MVT::i32);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+ // Convert back to integer and return.
+ // return vmovn_u32(vcvt_s32_f32(result));
+ N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+ return N0;
+}
+
+SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Don't know how to custom lower this!");
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::GlobalAddress:
+ return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
+ LowerGlobalAddressELF(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
+ case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+ case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
+ Subtarget);
+ case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
+ case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
+ case ISD::SRL_PARTS:
+ case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
+ case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
+ case ISD::VSETCC: return LowerVSETCC(Op, DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::UDIV: return LowerUDIV(Op, DAG);
+ }
+ return SDValue();
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ SDValue Res;
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Don't know how to custom expand this!");
+ break;
+ case ISD::BITCAST:
+ Res = ExpandBITCAST(N, DAG);
+ break;
+ case ISD::SRL:
+ case ISD::SRA:
+ Res = Expand64BitShift(N, DAG, Subtarget);
+ break;
+ }
+ if (Res.getNode())
+ Results.push_back(Res);
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned oldval = MI->getOperand(2).getReg();
+ unsigned newval = MI->getOperand(3).getReg();
+ unsigned scratch = BB->getParent()->getRegInfo()
+ .createVirtualRegister(ARM::GPRRegisterClass);
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ unsigned ldrOpc, strOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ break;
+ }
+
+ MachineFunction *MF = BB->getParent();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It; // insert the new blocks after the current block
+
+ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loop1MBB);
+ MF->insert(It, loop2MBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loop1MBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // ldrex dest, [ptr]
+ // cmp dest, oldval
+ // bne exitMBB
+ BB = loop1MBB;
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(dest).addReg(oldval));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(exitMBB);
+
+ // loop2MBB:
+ // strex scratch, newval, [ptr]
+ // cmp scratch, #0
+ // bne loop1MBB
+ BB = loop2MBB;
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
+ .addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size, unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ bool isThumb2 = Subtarget->isThumb2();
+ unsigned ldrOpc, strOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ break;
+ }
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+ unsigned scratch2 = (!BinOpcode) ? incr :
+ RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldrex dest, ptr
+ // <binop> scratch2, dest, incr
+ // strex scratch, scratch2, ptr
+ // cmp scratch, #0
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+ if (BinOpcode) {
+ // operand order needs to go the other way for NAND
+ if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+ addReg(incr).addReg(dest)).addReg(0);
+ else
+ AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+ addReg(dest).addReg(incr)).addReg(0);
+ }
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
+ .addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+static
+MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I != Succ)
+ return *I;
+ llvm_unreachable("Expecting a BB with two successors!");
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+ switch (MI->getOpcode()) {
+ default:
+ MI->dump();
+ llvm_unreachable("Unexpected instr type to insert");
+
+ case ARM::ATOMIC_LOAD_ADD_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+ case ARM::ATOMIC_LOAD_ADD_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+ case ARM::ATOMIC_LOAD_ADD_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+
+ case ARM::ATOMIC_LOAD_AND_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMIC_LOAD_AND_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMIC_LOAD_AND_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+
+ case ARM::ATOMIC_LOAD_OR_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMIC_LOAD_OR_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMIC_LOAD_OR_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+
+ case ARM::ATOMIC_LOAD_XOR_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMIC_LOAD_XOR_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMIC_LOAD_XOR_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+
+ case ARM::ATOMIC_LOAD_NAND_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+ case ARM::ATOMIC_LOAD_NAND_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+ case ARM::ATOMIC_LOAD_NAND_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+
+ case ARM::ATOMIC_LOAD_SUB_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_SUB_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_SUB_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+
+ case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
+ case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
+ case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
+
+ case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1);
+ case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
+ case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+
+ case ARM::tMOVCCr_pseudo: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+ .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(ARM::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+
+ case ARM::BCCi64:
+ case ARM::BCCZi64: {
+ // If there is an unconditional branch to the other successor, remove it.
+ BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
+
+ // Compare both parts that make up the double comparison separately for
+ // equality.
+ bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
+
+ unsigned LHS1 = MI->getOperand(1).getReg();
+ unsigned LHS2 = MI->getOperand(2).getReg();
+ if (RHSisZero) {
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS1).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS2).addImm(0)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ } else {
+ unsigned RHS1 = MI->getOperand(3).getReg();
+ unsigned RHS2 = MI->getOperand(4).getReg();
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS1).addReg(RHS1));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS2).addReg(RHS2)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ }
+
+ MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
+ MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
+ if (MI->getOperand(0).getImm() == ARMCC::NE)
+ std::swap(destMBB, exitMBB);
+
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
+ .addMBB(exitMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+static
+SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = N->getValueType(0);
+ unsigned Opc = N->getOpcode();
+ bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
+ SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
+ SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+
+ if (isSlctCC) {
+ CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
+ } else {
+ SDValue CCOp = Slct.getOperand(0);
+ if (CCOp.getOpcode() == ISD::SETCC)
+ CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
+ }
+
+ bool DoXform = false;
+ bool InvCC = false;
+ assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
+ "Bad input!");
+
+ if (LHS.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(LHS)->isNullValue()) {
+ DoXform = true;
+ } else if (CC != ISD::SETCC_INVALID &&
+ RHS.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(RHS)->isNullValue()) {
+ std::swap(LHS, RHS);
+ SDValue Op0 = Slct.getOperand(0);
+ EVT OpVT = isSlctCC ? Op0.getValueType() :
+ Op0.getOperand(0).getValueType();
+ bool isInt = OpVT.isInteger();
+ CC = ISD::getSetCCInverse(CC, isInt);
+
+ if (!TLI.isCondCodeLegal(CC, OpVT))
+ return SDValue(); // Inverse operator isn't legal.
+
+ DoXform = true;
+ InvCC = true;
+ }
+
+ if (DoXform) {
+ SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
+ if (isSlctCC)
+ return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
+ Slct.getOperand(0), Slct.getOperand(1), CC);
+ SDValue CCOp = Slct.getOperand(0);
+ if (InvCC)
+ CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
+ CCOp.getOperand(0), CCOp.getOperand(1), CC);
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ CCOp, OtherOp, Result);
+ }
+ return SDValue();
+}
+
+/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
+/// operands N0 and N1. This is a helper for PerformADDCombine that is
+/// called with the default operands, and if that fails, with commuted
+/// operands.
+static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+ if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
+ if (Result.getNode()) return Result;
+ }
+ return SDValue();
+}
+
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+///
+static SDValue PerformADDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // First try with the default operand order.
+ SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
+ if (Result.getNode())
+ return Result;
+
+ // If that didn't work, try again with the operands commuted.
+ return PerformADDCombineWithOperands(N, N1, N0, DCI);
+}
+
+/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+///
+static SDValue PerformSUBCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+ if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+ if (Result.getNode()) return Result;
+ }
+
+ return SDValue();
+}
+
+static SDValue PerformMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+
+ if (Subtarget->isThumb1Only())
+ return SDValue();
+
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ uint64_t MulAmt = C->getZExtValue();
+ unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+ ShiftAmt = ShiftAmt & (32 - 1);
+ SDValue V = N->getOperand(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue Res;
+ MulAmt >>= ShiftAmt;
+ if (isPowerOf2_32(MulAmt - 1)) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V, DAG.getNode(ISD::SHL, DL, VT,
+ V, DAG.getConstant(Log2_32(MulAmt-1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmt + 1)) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V, DAG.getConstant(Log2_32(MulAmt+1),
+ MVT::i32)),
+ V);
+ } else
+ return SDValue();
+
+ if (ShiftAmt != 0)
+ Res = DAG.getNode(ISD::SHL, DL, VT, Res,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+}
+
+static SDValue PerformANDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Attempt to use immediate-form VBIC
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN &&
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ EVT VbicVT;
+ SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VbicVT, VT.is128BitVector(),
+ OtherModImm);
+ if (Val.getNode()) {
+ SDValue Input =
+ DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
+ SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Attempt to use immediate-form VORR
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN && Subtarget->hasNEON() &&
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ EVT VorrVT;
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VorrVT, VT.is128BitVector(),
+ OtherModImm);
+ if (Val.getNode()) {
+ SDValue Input =
+ DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
+ SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
+ }
+ }
+ }
+
+ // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
+ // reasonable.
+
+ // BFI is only available on V6T2+
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ // 1) or (and A, mask), val => ARMbfi A, val, mask
+ // iff (val & mask) == val
+ //
+ // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
+ // && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+ // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
+ // && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+ // (i.e., copy a bitfield value into another bitfield of the same width)
+ if (N0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (VT != MVT::i32)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+
+ // The value and the mask need to be constants so we can verify this is
+ // actually a bitfield set. If the mask is 0xffff, we can do better
+ // via a movt instruction, so don't use BFI in that case.
+ SDValue MaskOp = N0.getOperand(1);
+ ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
+ if (!MaskC)
+ return SDValue();
+ unsigned Mask = MaskC->getZExtValue();
+ if (Mask == 0xffff)
+ return SDValue();
+ SDValue Res;
+ // Case (1): or (and A, mask), val => ARMbfi A, val, mask
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N1C) {
+ unsigned Val = N1C->getZExtValue();
+ if ((Val & ~Mask) != Val)
+ return SDValue();
+
+ if (ARM::isBitFieldInvertedMask(Mask)) {
+ Val >>= CountTrailingZeros_32(~Mask);
+
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
+ DAG.getConstant(Val, MVT::i32),
+ DAG.getConstant(Mask, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ }
+ } else if (N1.getOpcode() == ISD::AND) {
+ // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C)
+ return SDValue();
+ unsigned Mask2 = N11C->getZExtValue();
+
+ if (ARM::isBitFieldInvertedMask(Mask) &&
+ ARM::isBitFieldInvertedMask(~Mask2) &&
+ (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask == 0xffff || Mask == 0xffff0000))
+ return SDValue();
+ // 2a
+ unsigned lsb = CountTrailingZeros_32(Mask2);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
+ DAG.getConstant(lsb, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
+ DAG.getConstant(Mask, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ } else if (ARM::isBitFieldInvertedMask(~Mask) &&
+ ARM::isBitFieldInvertedMask(Mask2) &&
+ (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask2 == 0xffff || Mask2 == 0xffff0000))
+ return SDValue();
+ // 2b
+ unsigned lsb = CountTrailingZeros_32(Mask);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N00,
+ DAG.getConstant(lsb, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
+ DAG.getConstant(Mask2, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ }
+ }
+
+ if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
+ N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
+ ARM::isBitFieldInvertedMask(~Mask)) {
+ // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
+ // where lsb(mask) == #shamt and masked bits of B are known zero.
+ SDValue ShAmt = N00.getOperand(1);
+ unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+ unsigned LSB = CountTrailingZeros_32(Mask);
+ if (ShAmtC != LSB)
+ return SDValue();
+
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
+ DAG.getConstant(~Mask, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ }
+
+ return SDValue();
+}
+
+/// PerformBFICombine - (bfi A, (and B, C1), C2) -> (bfi A, B, C2) iff
+/// C1 & C2 == C1.
+static SDValue PerformBFICombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() == ISD::AND) {
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C)
+ return SDValue();
+ unsigned Mask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned Mask2 = N11C->getZExtValue();
+ if ((Mask & Mask2) == Mask2)
+ return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(0), N1.getOperand(0),
+ N->getOperand(2));
+ }
+ return SDValue();
+}
+
+/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVRRD.
+static SDValue PerformVMOVRRDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // vmovrrd(vmovdrr x, y) -> x,y
+ SDValue InDouble = N->getOperand(0);
+ if (InDouble.getOpcode() == ARMISD::VMOVDRR)
+ return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
+ return SDValue();
+}
+
+/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
+static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
+ // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() == ISD::BITCAST)
+ Op0 = Op0.getOperand(0);
+ if (Op1.getOpcode() == ISD::BITCAST)
+ Op1 = Op1.getOperand(0);
+ if (Op0.getOpcode() == ARMISD::VMOVRRD &&
+ Op0.getNode() == Op1.getNode() &&
+ Op0.getResNo() == 0 && Op1.getResNo() == 1)
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ N->getValueType(0), Op0.getOperand(0));
+ return SDValue();
+}
+
+/// PerformSTORECombine - Target-specific dag combine xforms for
+/// ISD::STORE.
+static SDValue PerformSTORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Bitcast an i64 store extracted from a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ SDValue StVal = St->getValue();
+ if (!ISD::isNormalStore(St) || St->isVolatile() ||
+ StVal.getValueType() != MVT::i64 ||
+ StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = StVal.getDebugLoc();
+ SDValue IntVec = StVal.getOperand(0);
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ IntVec.getValueType().getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
+ SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ Vec, StVal.getOperand(1));
+ dl = N->getDebugLoc();
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(ExtElt.getNode());
+ DCI.AddToWorklist(V.getNode());
+ return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment(),
+ St->getTBAAInfo());
+}
+
+/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
+/// are normal, non-volatile loads. If so, it is profitable to bitcast an
+/// i64 vector to have f64 elements, since the value can then be loaded
+/// directly into a VFP register.
+static bool hasNormalLoadOperand(SDNode *N) {
+ unsigned NumElts = N->getValueType(0).getVectorNumElements();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDNode *Elt = N->getOperand(i).getNode();
+ if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
+ return true;
+ }
+ return false;
+}
+
+/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
+/// ISD::BUILD_VECTOR.
+static SDValue PerformBUILD_VECTORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI){
+ // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
+ // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
+ // into a pair of GPRs, which is fine when the value is used as a scalar,
+ // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
+ SelectionDAG &DAG = DCI.DAG;
+ if (N->getNumOperands() == 2) {
+ SDValue RV = PerformVMOVDRRCombine(N, DAG);
+ if (RV.getNode())
+ return RV;
+ }
+
+ // Load i64 elements as f64 values so that type legalization does not split
+ // them up into i32 values.
+ EVT VT = N->getValueType(0);
+ if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
+ return SDValue();
+ DebugLoc dl = N->getDebugLoc();
+ SmallVector<SDValue, 8> Ops;
+ unsigned NumElts = VT.getVectorNumElements();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
+ Ops.push_back(V);
+ // Make the DAGCombiner fold the bitcast.
+ DCI.AddToWorklist(V.getNode());
+ }
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts);
+ return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+}
+
+/// PerformInsertEltCombine - Target-specific dag combine xforms for
+/// ISD::INSERT_VECTOR_ELT.
+static SDValue PerformInsertEltCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Bitcast an i64 load inserted into a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ EVT VT = N->getValueType(0);
+ SDNode *Elt = N->getOperand(1).getNode();
+ if (VT.getVectorElementType() != MVT::i64 ||
+ !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = N->getDebugLoc();
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ VT.getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(V.getNode());
+ SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
+ Vec, V, N->getOperand(2));
+ return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
+}
+
+/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
+/// ISD::VECTOR_SHUFFLE.
+static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
+ // The LLVM shufflevector instruction does not require the shuffle mask
+ // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
+ // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
+ // operands do not match the mask length, they are extended by concatenating
+ // them with undef vectors. That is probably the right thing for other
+ // targets, but for NEON it is better to concatenate two double-register
+ // size vector operands into a single quad-register size vector. Do that
+ // transformation here:
+ // shuffle(concat(v1, undef), concat(v2, undef)) ->
+ // shuffle(concat(v1, v2), undef)
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
+ Op1.getOpcode() != ISD::CONCAT_VECTORS ||
+ Op0.getNumOperands() != 2 ||
+ Op1.getNumOperands() != 2)
+ return SDValue();
+ SDValue Concat0Op1 = Op0.getOperand(1);
+ SDValue Concat1Op1 = Op1.getOperand(1);
+ if (Concat0Op1.getOpcode() != ISD::UNDEF ||
+ Concat1Op1.getOpcode() != ISD::UNDEF)
+ return SDValue();
+ // Skip the transformation if any of the types are illegal.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = N->getValueType(0);
+ if (!TLI.isTypeLegal(VT) ||
+ !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
+ !TLI.isTypeLegal(Concat1Op1.getValueType()))
+ return SDValue();
+
+ SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ Op0.getOperand(0), Op1.getOperand(0));
+ // Translate the shuffle mask.
+ SmallVector<int, 16> NewMask;
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfElts = NumElts/2;
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ for (unsigned n = 0; n < NumElts; ++n) {
+ int MaskElt = SVN->getMaskElt(n);
+ int NewElt = -1;
+ if (MaskElt < (int)HalfElts)
+ NewElt = MaskElt;
+ else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
+ NewElt = HalfElts + MaskElt - NumElts;
+ NewMask.push_back(NewElt);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
+ DAG.getUNDEF(VT), NewMask.data());
+}
+
+/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
+/// NEON load/store intrinsics to merge base address updates.
+static SDValue CombineBaseUpdate(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
+ N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+ unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+ SDValue Addr = N->getOperand(AddrOpIdx);
+
+ // Search for a use of the address operand that is an increment.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() != ISD::ADD ||
+ UI.getUse().getResNo() != Addr.getResNo())
+ continue;
+
+ // Check that the add is independent of the load/store. Otherwise, folding
+ // it would create a cycle.
+ if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+ continue;
+
+ // Find the new opcode for the updating load/store.
+ bool isLoad = true;
+ bool isLaneOp = false;
+ unsigned NewOpc = 0;
+ unsigned NumVecs = 0;
+ if (isIntrinsic) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: assert(0 && "unexpected intrinsic for Neon base update");
+ case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
+ NumVecs = 1; break;
+ case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
+ NumVecs = 2; break;
+ case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
+ NumVecs = 3; break;
+ case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
+ NumVecs = 4; break;
+ case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
+ NumVecs = 2; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
+ NumVecs = 3; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
+ NumVecs = 4; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
+ NumVecs = 1; isLoad = false; break;
+ case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
+ NumVecs = 2; isLoad = false; break;
+ case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
+ NumVecs = 3; isLoad = false; break;
+ case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
+ NumVecs = 4; isLoad = false; break;
+ case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
+ NumVecs = 2; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
+ NumVecs = 3; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
+ NumVecs = 4; isLoad = false; isLaneOp = true; break;
+ }
+ } else {
+ isLaneOp = true;
+ switch (N->getOpcode()) {
+ default: assert(0 && "unexpected opcode for Neon base update");
+ case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
+ case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
+ case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
+ }
+ }
+
+ // Find the size of memory referenced by the load/store.
+ EVT VecTy;
+ if (isLoad)
+ VecTy = N->getValueType(0);
+ else
+ VecTy = N->getOperand(AddrOpIdx+1).getValueType();
+ unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+ if (isLaneOp)
+ NumBytes /= VecTy.getVectorNumElements();
+
+ // If the increment is a constant, it must match the memory ref size.
+ SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
+ if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
+ uint64_t IncVal = CInc->getZExtValue();
+ if (IncVal != NumBytes)
+ continue;
+ } else if (NumBytes >= 3 * 16) {
+ // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
+ // separate instructions that make it harder to use a non-constant update.
+ continue;
+ }
+
+ // Create the new updating load/store node.
+ EVT Tys[6];
+ unsigned NumResultVecs = (isLoad ? NumVecs : 0);
+ unsigned n;
+ for (n = 0; n < NumResultVecs; ++n)
+ Tys[n] = VecTy;
+ Tys[n++] = MVT::i32;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(N->getOperand(0)); // incoming chain
+ Ops.push_back(N->getOperand(AddrOpIdx));
+ Ops.push_back(Inc);
+ for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+ MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
+ Ops.data(), Ops.size(),
+ MemInt->getMemoryVT(),
+ MemInt->getMemOperand());
+
+ // Update the uses.
+ std::vector<SDValue> NewResults;
+ for (unsigned i = 0; i < NumResultVecs; ++i) {
+ NewResults.push_back(SDValue(UpdN.getNode(), i));
+ }
+ NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
+ DCI.CombineTo(N, NewResults);
+ DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
+
+ break;
+ }
+ return SDValue();
+}
+
+/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
+/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
+/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
+/// return true.
+static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ // vldN-dup instructions only support 64-bit vectors for N > 1.
+ if (!VT.is64BitVector())
+ return false;
+
+ // Check if the VDUPLANE operand is a vldN-dup intrinsic.
+ SDNode *VLD = N->getOperand(0).getNode();
+ if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+ return false;
+ unsigned NumVecs = 0;
+ unsigned NewOpc = 0;
+ unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
+ if (IntNo == Intrinsic::arm_neon_vld2lane) {
+ NumVecs = 2;
+ NewOpc = ARMISD::VLD2DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
+ NumVecs = 3;
+ NewOpc = ARMISD::VLD3DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
+ NumVecs = 4;
+ NewOpc = ARMISD::VLD4DUP;
+ } else {
+ return false;
+ }
+
+ // First check that all the vldN-lane uses are VDUPLANEs and that the lane
+ // numbers match the load.
+ unsigned VLDLaneNo =
+ cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ // Ignore uses of the chain result.
+ if (UI.getUse().getResNo() == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ if (User->getOpcode() != ARMISD::VDUPLANE ||
+ VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
+ return false;
+ }
+
+ // Create the vldN-dup node.
+ EVT Tys[5];
+ unsigned n;
+ for (n = 0; n < NumVecs; ++n)
+ Tys[n] = VT;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
+ SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
+ MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
+ SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
+ Ops, 2, VLDMemInt->getMemoryVT(),
+ VLDMemInt->getMemOperand());
+
+ // Update the uses.
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ unsigned ResNo = UI.getUse().getResNo();
+ // Ignore uses of the chain result.
+ if (ResNo == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
+ }
+
+ // Now the vldN-lane intrinsic is dead except for its chain result.
+ // Update uses of the chain.
+ std::vector<SDValue> VLDDupResults;
+ for (unsigned n = 0; n < NumVecs; ++n)
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
+ DCI.CombineTo(VLD, VLDDupResults);
+
+ return true;
+}
+
+/// PerformVDUPLANECombine - Target-specific dag combine xforms for
+/// ARMISD::VDUPLANE.
+static SDValue PerformVDUPLANECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue Op = N->getOperand(0);
+
+ // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
+ // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
+ if (CombineVLDDUP(N, DCI))
+ return SDValue(N, 0);
+
+ // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
+ // redundant. Ignore bit_converts for now; element sizes are checked below.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
+ return SDValue();
+
+ // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
+ unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
+ // The canonical VMOV for a zero vector uses a 32-bit element size.
+ unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned EltBits;
+ if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
+ EltSize = 8;
+ EVT VT = N->getValueType(0);
+ if (EltSize > VT.getVectorElementType().getSizeInBits())
+ return SDValue();
+
+ return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
+}
+
+/// getVShiftImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift operation, where all the elements of the
+/// build_vector must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+ // Ignore bit_converts.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
+ HasAnyUndefs, ElementBits) ||
+ SplatBitSize > ElementBits)
+ return false;
+ Cnt = SplatBits.getSExtValue();
+ return true;
+}
+
+/// isVShiftLImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift left operation. That value must be in the range:
+/// 0 <= Value < ElementBits for a left shift; or
+/// 0 <= Value <= ElementBits for a long left shift.
+static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (! getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
+}
+
+/// isVShiftRImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift right operation. For a shift opcode, the value
+/// is positive, but for an intrinsic the value count must be negative. The
+/// absolute value must be in the range:
+/// 1 <= |Value| <= ElementBits for a right shift; or
+/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
+ int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (! getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ if (isIntrinsic)
+ Cnt = -Cnt;
+ return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
+}
+
+/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
+static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ // Don't do anything for most intrinsics.
+ break;
+
+ // Vector shifts: check for immediate versions and lower them.
+ // Note: This is done during DAG combining instead of DAG legalizing because
+ // the build_vectors for 64-bit vector element shift counts are generally
+ // not legal, and it is hard to see their values after they get legalized to
+ // loads from a constant pool.
+ case Intrinsic::arm_neon_vshifts:
+ case Intrinsic::arm_neon_vshiftu:
+ case Intrinsic::arm_neon_vshiftls:
+ case Intrinsic::arm_neon_vshiftlu:
+ case Intrinsic::arm_neon_vshiftn:
+ case Intrinsic::arm_neon_vrshifts:
+ case Intrinsic::arm_neon_vrshiftu:
+ case Intrinsic::arm_neon_vrshiftn:
+ case Intrinsic::arm_neon_vqshifts:
+ case Intrinsic::arm_neon_vqshiftu:
+ case Intrinsic::arm_neon_vqshiftsu:
+ case Intrinsic::arm_neon_vqshiftns:
+ case Intrinsic::arm_neon_vqshiftnu:
+ case Intrinsic::arm_neon_vqshiftnsu:
+ case Intrinsic::arm_neon_vqrshiftns:
+ case Intrinsic::arm_neon_vqrshiftnu:
+ case Intrinsic::arm_neon_vqrshiftnsu: {
+ EVT VT = N->getOperand(1).getValueType();
+ int64_t Cnt;
+ unsigned VShiftOpc = 0;
+
+ switch (IntNo) {
+ case Intrinsic::arm_neon_vshifts:
+ case Intrinsic::arm_neon_vshiftu:
+ if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
+ VShiftOpc = ARMISD::VSHL;
+ break;
+ }
+ if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
+ VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
+ ARMISD::VSHRs : ARMISD::VSHRu);
+ break;
+ }
+ return SDValue();
+
+ case Intrinsic::arm_neon_vshiftls:
+ case Intrinsic::arm_neon_vshiftlu:
+ if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
+ break;
+ llvm_unreachable("invalid shift count for vshll intrinsic");
+
+ case Intrinsic::arm_neon_vrshifts:
+ case Intrinsic::arm_neon_vrshiftu:
+ if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
+ break;
+ return SDValue();
+
+ case Intrinsic::arm_neon_vqshifts:
+ case Intrinsic::arm_neon_vqshiftu:
+ if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
+ break;
+ return SDValue();
+
+ case Intrinsic::arm_neon_vqshiftsu:
+ if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
+ break;
+ llvm_unreachable("invalid shift count for vqshlu intrinsic");
+
+ case Intrinsic::arm_neon_vshiftn:
+ case Intrinsic::arm_neon_vrshiftn:
+ case Intrinsic::arm_neon_vqshiftns:
+ case Intrinsic::arm_neon_vqshiftnu:
+ case Intrinsic::arm_neon_vqshiftnsu:
+ case Intrinsic::arm_neon_vqrshiftns:
+ case Intrinsic::arm_neon_vqrshiftnu:
+ case Intrinsic::arm_neon_vqrshiftnsu:
+ // Narrowing shifts require an immediate right shift.
+ if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
+ break;
+ llvm_unreachable("invalid shift count for narrowing vector shift "
+ "intrinsic");
+
+ default:
+ llvm_unreachable("unhandled vector shift");
+ }
+
+ switch (IntNo) {
+ case Intrinsic::arm_neon_vshifts:
+ case Intrinsic::arm_neon_vshiftu:
+ // Opcode already set above.
+ break;
+ case Intrinsic::arm_neon_vshiftls:
+ case Intrinsic::arm_neon_vshiftlu:
+ if (Cnt == VT.getVectorElementType().getSizeInBits())
+ VShiftOpc = ARMISD::VSHLLi;
+ else
+ VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
+ ARMISD::VSHLLs : ARMISD::VSHLLu);
+ break;
+ case Intrinsic::arm_neon_vshiftn:
+ VShiftOpc = ARMISD::VSHRN; break;
+ case Intrinsic::arm_neon_vrshifts:
+ VShiftOpc = ARMISD::VRSHRs; break;
+ case Intrinsic::arm_neon_vrshiftu:
+ VShiftOpc = ARMISD::VRSHRu; break;
+ case Intrinsic::arm_neon_vrshiftn:
+ VShiftOpc = ARMISD::VRSHRN; break;
+ case Intrinsic::arm_neon_vqshifts:
+ VShiftOpc = ARMISD::VQSHLs; break;
+ case Intrinsic::arm_neon_vqshiftu:
+ VShiftOpc = ARMISD::VQSHLu; break;
+ case Intrinsic::arm_neon_vqshiftsu:
+ VShiftOpc = ARMISD::VQSHLsu; break;
+ case Intrinsic::arm_neon_vqshiftns:
+ VShiftOpc = ARMISD::VQSHRNs; break;
+ case Intrinsic::arm_neon_vqshiftnu:
+ VShiftOpc = ARMISD::VQSHRNu; break;
+ case Intrinsic::arm_neon_vqshiftnsu:
+ VShiftOpc = ARMISD::VQSHRNsu; break;
+ case Intrinsic::arm_neon_vqrshiftns:
+ VShiftOpc = ARMISD::VQRSHRNs; break;
+ case Intrinsic::arm_neon_vqrshiftnu:
+ VShiftOpc = ARMISD::VQRSHRNu; break;
+ case Intrinsic::arm_neon_vqrshiftnsu:
+ VShiftOpc = ARMISD::VQRSHRNsu; break;
+ }
+
+ return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
+ }
+
+ case Intrinsic::arm_neon_vshiftins: {
+ EVT VT = N->getOperand(1).getValueType();
+ int64_t Cnt;
+ unsigned VShiftOpc = 0;
+
+ if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
+ VShiftOpc = ARMISD::VSLI;
+ else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
+ VShiftOpc = ARMISD::VSRI;
+ else {
+ llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
+ }
+
+ return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2),
+ DAG.getConstant(Cnt, MVT::i32));
+ }
+
+ case Intrinsic::arm_neon_vqrshifts:
+ case Intrinsic::arm_neon_vqrshiftu:
+ // No immediate versions of these to check for.
+ break;
+ }
+
+ return SDValue();
+}
+
+/// PerformShiftCombine - Checks for immediate versions of vector shifts and
+/// lowers them. As with the vector shift intrinsics, this is done during DAG
+/// combining instead of DAG legalizing because the build_vectors for 64-bit
+/// vector element shift counts are generally not legal, and it is hard to see
+/// their values after they get legalized to loads from a constant pool.
+static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+
+ // Nothing to be done for scalar shifts.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!VT.isVector() || !TLI.isTypeLegal(VT))
+ return SDValue();
+
+ assert(ST->hasNEON() && "unexpected vector shift");
+ int64_t Cnt;
+
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unexpected shift opcode");
+
+ case ISD::SHL:
+ if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
+ return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
+ DAG.getConstant(Cnt, MVT::i32));
+ break;
+
+ case ISD::SRA:
+ case ISD::SRL:
+ if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
+ unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
+ ARMISD::VSHRs : ARMISD::VSHRu);
+ return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
+ DAG.getConstant(Cnt, MVT::i32));
+ }
+ }
+ return SDValue();
+}
+
+/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
+/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
+static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ SDValue N0 = N->getOperand(0);
+
+ // Check for sign- and zero-extensions of vector extract operations of 8-
+ // and 16-bit vector elements. NEON supports these directly. They are
+ // handled during DAG combining because type legalization will promote them
+ // to 32-bit types and it is messy to recognize the operations after that.
+ if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ SDValue Vec = N0.getOperand(0);
+ SDValue Lane = N0.getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT EltVT = N0.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (VT == MVT::i32 &&
+ (EltVT == MVT::i8 || EltVT == MVT::i16) &&
+ TLI.isTypeLegal(Vec.getValueType()) &&
+ isa<ConstantSDNode>(Lane)) {
+
+ unsigned Opc = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ISD::SIGN_EXTEND:
+ Opc = ARMISD::VGETLANEs;
+ break;
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Opc = ARMISD::VGETLANEu;
+ break;
+ }
+ return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
+ }
+ }
+
+ return SDValue();
+}
+
+/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
+static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ // If the target supports NEON, try to use vmax/vmin instructions for f32
+ // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set,
+ // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
+ // a NaN; only do the transformation when it matches that behavior.
+
+ // For now only do this when using NEON for FP operations; if using VFP, it
+ // is not obvious that the benefit outweighs the cost of switching to the
+ // NEON pipeline.
+ if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
+ N->getValueType(0) != MVT::f32)
+ return SDValue();
+
+ SDValue CondLHS = N->getOperand(0);
+ SDValue CondRHS = N->getOperand(1);
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ unsigned Opcode = 0;
+ bool IsReversed;
+ if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
+ IsReversed = false; // x CC y ? x : y
+ } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
+ IsReversed = true ; // x CC y ? y : x
+ } else {
+ return SDValue();
+ }
+
+ bool IsUnordered;
+ switch (CC) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If LHS is NaN, an ordered comparison will be false and the result will
+ // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS
+ // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ break;
+ // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
+ // will return -0, so vmin can only be used for unsafe math or if one of
+ // the operands is known to be nonzero.
+ if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
+ !UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
+ break;
+
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ // If LHS is NaN, an ordered comparison will be false and the result will
+ // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS
+ // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ break;
+ // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
+ // will return +0, so vmax can only be used for unsafe math or if one of
+ // the operands is known to be nonzero.
+ if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
+ !UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
+ break;
+ }
+
+ if (!Opcode)
+ return SDValue();
+ return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
+}
+
+SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD: return PerformADDCombine(N, DCI);
+ case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
+ case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
+ case ISD::AND: return PerformANDCombine(N, DCI);
+ case ARMISD::BFI: return PerformBFICombine(N, DCI);
+ case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+ case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
+ case ISD::STORE: return PerformSTORECombine(N, DCI);
+ case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
+ case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
+ case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
+ case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
+ case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
+ case ARMISD::VLD2DUP:
+ case ARMISD::VLD3DUP:
+ case ARMISD::VLD4DUP:
+ return CombineBaseUpdate(N, DCI);
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN:
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane:
+ return CombineBaseUpdate(N, DCI);
+ default: break;
+ }
+ break;
+ }
+ return SDValue();
+}
+
+bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
+ EVT VT) const {
+ return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
+}
+
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+ if (!Subtarget->allowsUnalignedMem())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return false;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return true;
+ // FIXME: VLD1 etc with standard alignment is legal.
+ }
+}
+
+static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
+ if (V < 0)
+ return false;
+
+ unsigned Scale = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ // Scale == 1;
+ break;
+ case MVT::i16:
+ // Scale == 2;
+ Scale = 2;
+ break;
+ case MVT::i32:
+ // Scale == 4;
+ Scale = 4;
+ break;
+ }
+
+ if ((V & (Scale - 1)) != 0)
+ return false;
+ V /= Scale;
+ return V == (V & ((1LL << 5) - 1));
+}
+
+static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
+ const ARMSubtarget *Subtarget) {
+ bool isNeg = false;
+ if (V < 0) {
+ isNeg = true;
+ V = - V;
+ }
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // + imm12 or - imm8
+ if (isNeg)
+ return V == (V & ((1LL << 8) - 1));
+ return V == (V & ((1LL << 12) - 1));
+ case MVT::f32:
+ case MVT::f64:
+ // Same as ARM mode. FIXME: NEON?
+ if (!Subtarget->hasVFP2())
+ return false;
+ if ((V & 3) != 0)
+ return false;
+ V >>= 2;
+ return V == (V & ((1LL << 8) - 1));
+ }
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+static bool isLegalAddressImmediate(int64_t V, EVT VT,
+ const ARMSubtarget *Subtarget) {
+ if (V == 0)
+ return true;
+
+ if (!VT.isSimple())
+ return false;
+
+ if (Subtarget->isThumb1Only())
+ return isLegalT1AddressImmediate(V, VT);
+ else if (Subtarget->isThumb2())
+ return isLegalT2AddressImmediate(V, VT, Subtarget);
+
+ // ARM mode.
+ if (V < 0)
+ V = - V;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ // +- imm12
+ return V == (V & ((1LL << 12) - 1));
+ case MVT::i16:
+ // +- imm8
+ return V == (V & ((1LL << 8) - 1));
+ case MVT::f32:
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) // FIXME: NEON?
+ return false;
+ if ((V & 3) != 0)
+ return false;
+ V >>= 2;
+ return V == (V & ((1LL << 8) - 1));
+ }
+}
+
+bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
+ EVT VT) const {
+ int Scale = AM.Scale;
+ if (Scale < 0)
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ if (Scale == 1)
+ return true;
+ // r + r << imm
+ Scale = Scale & ~1;
+ return Scale == 2 || Scale == 4 || Scale == 8;
+ case MVT::i64:
+ // r + r
+ if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+ return true;
+ return false;
+ case MVT::isVoid:
+ // Note, we allow "void" uses (basically, uses that aren't loads or
+ // stores), because arm allows folding a scale into many arithmetic
+ // operations. This should be made more precise and revisited later.
+
+ // Allow r << imm, but the imm has to be a multiple of two.
+ if (Scale & 1) return false;
+ return isPowerOf2_32(Scale);
+ }
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ EVT VT = getValueType(Ty, true);
+ if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
+ return false;
+
+ // Can never fold addr of global into load/store.
+ if (AM.BaseGV)
+ return false;
+
+ switch (AM.Scale) {
+ case 0: // no scale reg, must be "r+i" or "r", or "i".
+ break;
+ case 1:
+ if (Subtarget->isThumb1Only())
+ return false;
+ // FALL THROUGH.
+ default:
+ // ARM doesn't support any R+R*scale+imm addr modes.
+ if (AM.BaseOffs)
+ return false;
+
+ if (!VT.isSimple())
+ return false;
+
+ if (Subtarget->isThumb2())
+ return isLegalT2ScaledAddressingMode(AM, VT);
+
+ int Scale = AM.Scale;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ if (Scale < 0) Scale = -Scale;
+ if (Scale == 1)
+ return true;
+ // r + r << imm
+ return isPowerOf2_32(Scale & ~1);
+ case MVT::i16:
+ case MVT::i64:
+ // r + r
+ if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+ return true;
+ return false;
+
+ case MVT::isVoid:
+ // Note, we allow "void" uses (basically, uses that aren't loads or
+ // stores), because arm allows folding a scale into many arithmetic
+ // operations. This should be made more precise and revisited later.
+
+ // Allow r << imm, but the imm has to be a multiple of two.
+ if (Scale & 1) return false;
+ return isPowerOf2_32(Scale);
+ }
+ break;
+ }
+ return true;
+}
+
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ if (!Subtarget->isThumb())
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ if (Subtarget->isThumb2())
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ return Imm >= 0 && Imm <= 255;
+}
+
+static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+ return false;
+
+ if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
+ // AddressingMode 3
+ Base = Ptr->getOperand(0);
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -256) {
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ return true;
+ }
+ }
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Offset = Ptr->getOperand(1);
+ return true;
+ } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
+ // AddressingMode 2
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -0x1000) {
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ Base = Ptr->getOperand(0);
+ return true;
+ }
+ }
+
+ if (Ptr->getOpcode() == ISD::ADD) {
+ isInc = true;
+ ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ Base = Ptr->getOperand(1);
+ Offset = Ptr->getOperand(0);
+ } else {
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ }
+ return true;
+ }
+
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ return true;
+ }
+
+ // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
+ return false;
+}
+
+static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+ return false;
+
+ Base = Ptr->getOperand(0);
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ return true;
+ } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
+ isInc = Ptr->getOpcode() == ISD::ADD;
+ Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool
+ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ EVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = false;
+ if (Subtarget->isThumb2())
+ isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+ Offset, isInc, DAG);
+ else
+ isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+ Offset, isInc, DAG);
+ if (!isLegal)
+ return false;
+
+ AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
+ return true;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ EVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ Ptr = LD->getBasePtr();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ Ptr = ST->getBasePtr();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = false;
+ if (Subtarget->isThumb2())
+ isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ else
+ isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ if (!isLegal)
+ return false;
+
+ if (Ptr != Base) {
+ // Swap base ptr and offset to catch more post-index load / store when
+ // it's legal. In Thumb2 mode, offset must be an immediate.
+ if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
+ !Subtarget->isThumb2())
+ std::swap(Base, Offset);
+
+ // Post-indexed load / store update the base pointer.
+ if (Ptr != Base)
+ return false;
+ }
+
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+}
+
+void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case ARMISD::CMOV: {
+ // Bits are known zero/one if known on the LHS and RHS.
+ DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+ if (KnownZero == 0 && KnownOne == 0) return;
+
+ APInt KnownZeroRHS, KnownOneRHS;
+ DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
+ KnownZeroRHS, KnownOneRHS, Depth+1);
+ KnownZero &= KnownZeroRHS;
+ KnownOne &= KnownOneRHS;
+ return;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
+ // Looking for "rev" which is V6+.
+ if (!Subtarget->hasV6Ops())
+ return false;
+
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+ std::string AsmStr = IA->getAsmString();
+ SmallVector<StringRef, 4> AsmPieces;
+ SplitString(AsmStr, AsmPieces, ";\n");
+
+ switch (AsmPieces.size()) {
+ default: return false;
+ case 1:
+ AsmStr = AsmPieces[0];
+ AsmPieces.clear();
+ SplitString(AsmStr, AsmPieces, " \t,");
+
+ // rev $0, $1
+ if (AsmPieces.size() == 3 &&
+ AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
+ IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (Ty && Ty->getBitWidth() == 32)
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ break;
+ }
+
+ return false;
+}
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+ARMTargetLowering::ConstraintType
+ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'l': return C_RegisterClass;
+ case 'w': return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ARMTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ const Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'l':
+ if (type->isIntegerTy()) {
+ if (Subtarget->isThumb())
+ weight = CW_SpecificReg;
+ else
+ weight = CW_Register;
+ }
+ break;
+ case 'w':
+ if (type->isFloatingPointTy())
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC ARM Constraint Letters
+ switch (Constraint[0]) {
+ case 'l':
+ if (Subtarget->isThumb())
+ return std::make_pair(0U, ARM::tGPRRegisterClass);
+ else
+ return std::make_pair(0U, ARM::GPRRegisterClass);
+ case 'r':
+ return std::make_pair(0U, ARM::GPRRegisterClass);
+ case 'w':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, ARM::SPRRegisterClass);
+ if (VT.getSizeInBits() == 64)
+ return std::make_pair(0U, ARM::DPRRegisterClass);
+ if (VT.getSizeInBits() == 128)
+ return std::make_pair(0U, ARM::QPRRegisterClass);
+ break;
+ }
+ }
+ if (StringRef("{cc}").equals_lower(Constraint))
+ return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+std::vector<unsigned> ARMTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() != 1)
+ return std::vector<unsigned>();
+
+ switch (Constraint[0]) { // GCC ARM Constraint Letters
+ default: break;
+ case 'l':
+ return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ 0);
+ case 'r':
+ return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+ ARM::R12, ARM::LR, 0);
+ case 'w':
+ if (VT == MVT::f32)
+ return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12,ARM::S13,ARM::S14,ARM::S15,
+ ARM::S16,ARM::S17,ARM::S18,ARM::S19,
+ ARM::S20,ARM::S21,ARM::S22,ARM::S23,
+ ARM::S24,ARM::S25,ARM::S26,ARM::S27,
+ ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
+ if (VT.getSizeInBits() == 64)
+ return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7,
+ ARM::D8, ARM::D9, ARM::D10,ARM::D11,
+ ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
+ if (VT.getSizeInBits() == 128)
+ return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
+ ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
+ break;
+ }
+
+ return std::vector<unsigned>();
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ char Constraint,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ switch (Constraint) {
+ default: break;
+ case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O':
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C)
+ return;
+
+ int64_t CVal64 = C->getSExtValue();
+ int CVal = (int) CVal64;
+ // None of these constraints allow values larger than 32 bits. Check
+ // that the value fits in an int.
+ if (CVal != CVal64)
+ return;
+
+ switch (Constraint) {
+ case 'I':
+ if (Subtarget->isThumb1Only()) {
+ // This must be a constant between 0 and 255, for ADD
+ // immediates.
+ if (CVal >= 0 && CVal <= 255)
+ break;
+ } else if (Subtarget->isThumb2()) {
+ // A constant that can be used as an immediate value in a
+ // data-processing instruction.
+ if (ARM_AM::getT2SOImmVal(CVal) != -1)
+ break;
+ } else {
+ // A constant that can be used as an immediate value in a
+ // data-processing instruction.
+ if (ARM_AM::getSOImmVal(CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'J':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a constant between -255 and -1, for negated ADD
+ // immediates. This can be used in GCC with an "n" modifier that
+ // prints the negated value, for use with SUB instructions. It is
+ // not useful otherwise but is implemented for compatibility.
+ if (CVal >= -255 && CVal <= -1)
+ break;
+ } else {
+ // This must be a constant between -4095 and 4095. It is not clear
+ // what this constraint is intended for. Implemented for
+ // compatibility with GCC.
+ if (CVal >= -4095 && CVal <= 4095)
+ break;
+ }
+ return;
+
+ case 'K':
+ if (Subtarget->isThumb1Only()) {
+ // A 32-bit value where only one byte has a nonzero value. Exclude
+ // zero to match GCC. This constraint is used by GCC internally for
+ // constants that can be loaded with a move/shift combination.
+ // It is not useful otherwise but is implemented for compatibility.
+ if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
+ break;
+ } else if (Subtarget->isThumb2()) {
+ // A constant whose bitwise inverse can be used as an immediate
+ // value in a data-processing instruction. This can be used in GCC
+ // with a "B" modifier that prints the inverted value, for use with
+ // BIC and MVN instructions. It is not useful otherwise but is
+ // implemented for compatibility.
+ if (ARM_AM::getT2SOImmVal(~CVal) != -1)
+ break;
+ } else {
+ // A constant whose bitwise inverse can be used as an immediate
+ // value in a data-processing instruction. This can be used in GCC
+ // with a "B" modifier that prints the inverted value, for use with
+ // BIC and MVN instructions. It is not useful otherwise but is
+ // implemented for compatibility.
+ if (ARM_AM::getSOImmVal(~CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'L':
+ if (Subtarget->isThumb1Only()) {
+ // This must be a constant between -7 and 7,
+ // for 3-operand ADD/SUB immediate instructions.
+ if (CVal >= -7 && CVal < 7)
+ break;
+ } else if (Subtarget->isThumb2()) {
+ // A constant whose negation can be used as an immediate value in a
+ // data-processing instruction. This can be used in GCC with an "n"
+ // modifier that prints the negated value, for use with SUB
+ // instructions. It is not useful otherwise but is implemented for
+ // compatibility.
+ if (ARM_AM::getT2SOImmVal(-CVal) != -1)
+ break;
+ } else {
+ // A constant whose negation can be used as an immediate value in a
+ // data-processing instruction. This can be used in GCC with an "n"
+ // modifier that prints the negated value, for use with SUB
+ // instructions. It is not useful otherwise but is implemented for
+ // compatibility.
+ if (ARM_AM::getSOImmVal(-CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'M':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a multiple of 4 between 0 and 1020, for
+ // ADD sp + immediate.
+ if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
+ break;
+ } else {
+ // A power of two or a constant between 0 and 32. This is used in
+ // GCC for the shift amount on shifted register operands, but it is
+ // useful in general for any shift amounts.
+ if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
+ break;
+ }
+ return;
+
+ case 'N':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a constant between 0 and 31, for shift amounts.
+ if (CVal >= 0 && CVal <= 31)
+ break;
+ }
+ return;
+
+ case 'O':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a multiple of 4 between -508 and 508, for
+ // ADD/SUB sp = sp + immediate.
+ if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
+ break;
+ }
+ return;
+ }
+ Result = DAG.getTargetConstant(CVal, Op.getValueType());
+ break;
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+bool
+ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The ARM target isn't yet aware of offsets.
+ return false;
+}
+
+int ARM::getVFPf32Imm(const APFloat &FPImm) {
+ APInt Imm = FPImm.bitcastToAPInt();
+ uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
+ int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
+ int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0x7ffff)
+ return -1;
+ Mantissa >>= 19;
+ if ((Mantissa & 0xf) != Mantissa)
+ return -1;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+}
+
+int ARM::getVFPf64Imm(const APFloat &FPImm) {
+ APInt Imm = FPImm.bitcastToAPInt();
+ uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
+ int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
+ uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL;
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0xffffffffffffLL)
+ return -1;
+ Mantissa >>= 48;
+ if ((Mantissa & 0xf) != Mantissa)
+ return -1;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+}
+
+bool ARM::isBitFieldInvertedMask(unsigned v) {
+ if (v == 0xffffffff)
+ return 0;
+ // there can be 1's on either or both "outsides", all the "inside"
+ // bits must be 0's
+ unsigned int lsb = 0, msb = 31;
+ while (v & (1 << msb)) --msb;
+ while (v & (1 << lsb)) ++lsb;
+ for (unsigned int i = lsb; i <= msb; ++i) {
+ if (v & (1 << i))
+ return 0;
+ }
+ return 1;
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ if (!Subtarget->hasVFP3())
+ return false;
+ if (VT == MVT::f32)
+ return ARM::getVFPf32Imm(Imm) != -1;
+ if (VT == MVT::f64)
+ return ARM::getVFPf64Imm(Imm) != -1;
+ return false;
+}
+
+/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
+/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
+/// specified in the intrinsic calls.
+bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ // Conservatively set memVT to the entire set of vectors loaded.
+ uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8;
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile loads with NEON intrinsics not supported
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ // Conservatively set memVT to the entire set of vectors stored.
+ unsigned NumElts = 0;
+ for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ const Type *ArgTy = I.getArgOperand(ArgI)->getType();
+ if (!ArgTy->isVectorTy())
+ break;
+ NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
+ }
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile stores with NEON intrinsics not supported
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
new file mode 100644
index 0000000..dc400c4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -0,0 +1,488 @@
+//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMISELLOWERING_H
+#define ARMISELLOWERING_H
+
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include <vector>
+
+namespace llvm {
+ class ARMConstantPoolValue;
+
+ namespace ARMISD {
+ // ARM Specific DAG Nodes
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
+ // TargetExternalSymbol, and TargetGlobalAddress.
+ WrapperDYN, // WrapperDYN - A wrapper node for TargetGlobalAddress in
+ // DYN mode.
+ WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in
+ // PIC mode.
+ WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
+
+ CALL, // Function call.
+ CALL_PRED, // Function call that's predicable.
+ CALL_NOLINK, // Function call with branch not branch-and-link.
+ tCALL, // Thumb function call.
+ BRCOND, // Conditional branch.
+ BR_JT, // Jumptable branch.
+ BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
+ RET_FLAG, // Return with a flag operand.
+
+ PIC_ADD, // Add with a PC operand and a PIC label.
+
+ CMP, // ARM compare instructions.
+ CMPZ, // ARM compare that sets only Z flag.
+ CMPFP, // ARM VFP compare instruction, sets FPSCR.
+ CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
+ FMSTAT, // ARM fmstat instruction.
+ CMOV, // ARM conditional move instructions.
+ CNEG, // ARM conditional negate instructions.
+
+ BCC_i64,
+
+ RBIT, // ARM bitreverse instruction
+
+ FTOSI, // FP to sint within a FP register.
+ FTOUI, // FP to uint within a FP register.
+ SITOF, // sint to FP within a FP register.
+ UITOF, // uint to FP within a FP register.
+
+ SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
+ SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
+ RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
+
+ VMOVRRD, // double to two gprs.
+ VMOVDRR, // Two gprs to double.
+
+ EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
+ EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ EH_SJLJ_DISPATCHSETUP, // SjLj exception handling dispatch setup.
+
+ TC_RETURN, // Tail call return pseudo.
+
+ THREAD_POINTER,
+
+ DYN_ALLOC, // Dynamic allocation on the stack.
+
+ MEMBARRIER, // Memory barrier (DMB)
+ MEMBARRIER_MCR, // Memory barrier (MCR)
+
+ PRELOAD, // Preload
+
+ VCEQ, // Vector compare equal.
+ VCEQZ, // Vector compare equal to zero.
+ VCGE, // Vector compare greater than or equal.
+ VCGEZ, // Vector compare greater than or equal to zero.
+ VCLEZ, // Vector compare less than or equal to zero.
+ VCGEU, // Vector compare unsigned greater than or equal.
+ VCGT, // Vector compare greater than.
+ VCGTZ, // Vector compare greater than zero.
+ VCLTZ, // Vector compare less than zero.
+ VCGTU, // Vector compare unsigned greater than.
+ VTST, // Vector test bits.
+
+ // Vector shift by immediate:
+ VSHL, // ...left
+ VSHRs, // ...right (signed)
+ VSHRu, // ...right (unsigned)
+ VSHLLs, // ...left long (signed)
+ VSHLLu, // ...left long (unsigned)
+ VSHLLi, // ...left long (with maximum shift count)
+ VSHRN, // ...right narrow
+
+ // Vector rounding shift by immediate:
+ VRSHRs, // ...right (signed)
+ VRSHRu, // ...right (unsigned)
+ VRSHRN, // ...right narrow
+
+ // Vector saturating shift by immediate:
+ VQSHLs, // ...left (signed)
+ VQSHLu, // ...left (unsigned)
+ VQSHLsu, // ...left (signed to unsigned)
+ VQSHRNs, // ...right narrow (signed)
+ VQSHRNu, // ...right narrow (unsigned)
+ VQSHRNsu, // ...right narrow (signed to unsigned)
+
+ // Vector saturating rounding shift by immediate:
+ VQRSHRNs, // ...right narrow (signed)
+ VQRSHRNu, // ...right narrow (unsigned)
+ VQRSHRNsu, // ...right narrow (signed to unsigned)
+
+ // Vector shift and insert:
+ VSLI, // ...left
+ VSRI, // ...right
+
+ // Vector get lane (VMOV scalar to ARM core register)
+ // (These are used for 8- and 16-bit element types only.)
+ VGETLANEu, // zero-extend vector extract element
+ VGETLANEs, // sign-extend vector extract element
+
+ // Vector move immediate and move negated immediate:
+ VMOVIMM,
+ VMVNIMM,
+
+ // Vector duplicate:
+ VDUP,
+ VDUPLANE,
+
+ // Vector shuffles:
+ VEXT, // extract
+ VREV64, // reverse elements within 64-bit doublewords
+ VREV32, // reverse elements within 32-bit words
+ VREV16, // reverse elements within 16-bit halfwords
+ VZIP, // zip (interleave)
+ VUZP, // unzip (deinterleave)
+ VTRN, // transpose
+
+ // Vector multiply long:
+ VMULLs, // ...signed
+ VMULLu, // ...unsigned
+
+ // Operands of the standard BUILD_VECTOR node are not legalized, which
+ // is fine if BUILD_VECTORs are always lowered to shuffles or other
+ // operations, but for ARM some BUILD_VECTORs are legal as-is and their
+ // operands need to be legalized. Define an ARM-specific version of
+ // BUILD_VECTOR for this purpose.
+ BUILD_VECTOR,
+
+ // Floating-point max and min:
+ FMAX,
+ FMIN,
+
+ // Bit-field insert
+ BFI,
+
+ // Vector OR with immediate
+ VORRIMM,
+ // Vector AND with NOT of immediate
+ VBICIMM,
+
+ // Vector load N-element structure to all lanes:
+ VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ VLD3DUP,
+ VLD4DUP,
+
+ // NEON loads with post-increment base updates:
+ VLD1_UPD,
+ VLD2_UPD,
+ VLD3_UPD,
+ VLD4_UPD,
+ VLD2LN_UPD,
+ VLD3LN_UPD,
+ VLD4LN_UPD,
+ VLD2DUP_UPD,
+ VLD3DUP_UPD,
+ VLD4DUP_UPD,
+
+ // NEON stores with post-increment base updates:
+ VST1_UPD,
+ VST2_UPD,
+ VST3_UPD,
+ VST4_UPD,
+ VST2LN_UPD,
+ VST3LN_UPD,
+ VST4LN_UPD
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace ARM {
+ /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be
+ /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd)
+ /// instruction, returns its 8-bit integer representation. Otherwise,
+ /// returns -1.
+ int getVFPf32Imm(const APFloat &FPImm);
+ int getVFPf64Imm(const APFloat &FPImm);
+ bool isBitFieldInvertedMask(unsigned v);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // ARMTargetLowering - ARM Implementation of the TargetLowering interface
+
+ class ARMTargetLowering : public TargetLowering {
+ public:
+ explicit ARMTargetLowering(TargetMachine &TM);
+
+ virtual unsigned getJumpTableEncoding(void) const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
+
+ /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+ /// unaligned memory accesses. of the specified type.
+ /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+ bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ /// getPostIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if this node can be
+ /// combined with a load / store to form a post-indexed load / store.
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const;
+
+
+ virtual bool ExpandInlineAsm(CallInst *CI) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ char ConstraintLetter,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ const ARMSubtarget* getSubtarget() const {
+ return Subtarget;
+ }
+
+ /// getRegClassFor - Return the register class that should be used for the
+ /// specified value type.
+ virtual TargetRegisterClass *getRegClassFor(EVT VT) const;
+
+ /// getFunctionAlignment - Return the Log2 alignment of this function.
+ virtual unsigned getFunctionAlignment(const Function *F) const;
+
+ /// getMaximalGlobalOffset - Returns the maximal possible offset which can
+ /// be used for loads / stores from the global.
+ virtual unsigned getMaximalGlobalOffset() const;
+
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+
+ Sched::Preference getSchedulingPreference(SDNode *N) const;
+
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
+ bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const;
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
+
+ private:
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ const TargetRegisterInfo *RegInfo;
+
+ const InstrItineraryData *Itins;
+
+ /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
+ ///
+ unsigned ARMPCLabelIndex;
+
+ void addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT);
+ void addDRTypeForNEON(EVT VT);
+ void addQRTypeForNEON(EVT VT);
+
+ typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
+ void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+ SDValue Chain, SDValue &Arg,
+ RegsToPassVector &RegsToPass,
+ CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &StackPtr,
+ SmallVector<SDValue, 8> &MemOpChains,
+ ISD::ArgFlagsTy Flags) const;
+ SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &Root, SelectionDAG &DAG,
+ DebugLoc dl) const;
+
+ CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
+ bool isVarArg) const;
+ SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const;
+ SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const;
+ SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const;
+ SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const;
+
+ SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool isUsedByReturnOnly(SDNode *N) const;
+
+ SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue getVFPCmp(SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, DebugLoc dl) const;
+
+ SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
+
+ MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const;
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ unsigned BinOpcode) const;
+
+ };
+
+ enum NEONModImmType {
+ VMOVModImm,
+ VMVNModImm,
+ OtherModImm
+ };
+
+
+ namespace ARM {
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ }
+}
+
+#endif // ARMISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
new file mode 100644
index 0000000..765cba4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -0,0 +1,1793 @@
+//===- ARMInstrFormats.td - ARM Instruction Formats ----------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// ARM Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+ bits<6> Value = val;
+}
+
+def Pseudo : Format<0>;
+def MulFrm : Format<1>;
+def BrFrm : Format<2>;
+def BrMiscFrm : Format<3>;
+
+def DPFrm : Format<4>;
+def DPSoRegFrm : Format<5>;
+
+def LdFrm : Format<6>;
+def StFrm : Format<7>;
+def LdMiscFrm : Format<8>;
+def StMiscFrm : Format<9>;
+def LdStMulFrm : Format<10>;
+
+def LdStExFrm : Format<11>;
+
+def ArithMiscFrm : Format<12>;
+def SatFrm : Format<13>;
+def ExtFrm : Format<14>;
+
+def VFPUnaryFrm : Format<15>;
+def VFPBinaryFrm : Format<16>;
+def VFPConv1Frm : Format<17>;
+def VFPConv2Frm : Format<18>;
+def VFPConv3Frm : Format<19>;
+def VFPConv4Frm : Format<20>;
+def VFPConv5Frm : Format<21>;
+def VFPLdStFrm : Format<22>;
+def VFPLdStMulFrm : Format<23>;
+def VFPMiscFrm : Format<24>;
+
+def ThumbFrm : Format<25>;
+def MiscFrm : Format<26>;
+
+def NGetLnFrm : Format<27>;
+def NSetLnFrm : Format<28>;
+def NDupFrm : Format<29>;
+def NLdStFrm : Format<30>;
+def N1RegModImmFrm: Format<31>;
+def N2RegFrm : Format<32>;
+def NVCVTFrm : Format<33>;
+def NVDupLnFrm : Format<34>;
+def N2RegVShLFrm : Format<35>;
+def N2RegVShRFrm : Format<36>;
+def N3RegFrm : Format<37>;
+def N3RegVShFrm : Format<38>;
+def NVExtFrm : Format<39>;
+def NVMulSLFrm : Format<40>;
+def NVTBLFrm : Format<41>;
+
+// Misc flags.
+
+// The instruction has an Rn register operand.
+// UnaryDP - Indicates this is a unary data processing instruction, i.e.
+// it doesn't have a Rn operand.
+class UnaryDP { bit isUnaryDataProc = 1; }
+
+// Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+// a 16-bit Thumb instruction if certain conditions are met.
+class Xform16Bit { bit canXformTo16Bit = 1; }
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction flags. These need to match ARMBaseInstrInfo.h.
+//
+
+// FIXME: Once the JIT is MC-ized, these can go away.
+// Addressing mode.
+class AddrMode<bits<5> val> {
+ bits<5> Value = val;
+}
+def AddrModeNone : AddrMode<0>;
+def AddrMode1 : AddrMode<1>;
+def AddrMode2 : AddrMode<2>;
+def AddrMode3 : AddrMode<3>;
+def AddrMode4 : AddrMode<4>;
+def AddrMode5 : AddrMode<5>;
+def AddrMode6 : AddrMode<6>;
+def AddrModeT1_1 : AddrMode<7>;
+def AddrModeT1_2 : AddrMode<8>;
+def AddrModeT1_4 : AddrMode<9>;
+def AddrModeT1_s : AddrMode<10>;
+def AddrModeT2_i12 : AddrMode<11>;
+def AddrModeT2_i8 : AddrMode<12>;
+def AddrModeT2_so : AddrMode<13>;
+def AddrModeT2_pc : AddrMode<14>;
+def AddrModeT2_i8s4 : AddrMode<15>;
+def AddrMode_i12 : AddrMode<16>;
+
+// Instruction size.
+class SizeFlagVal<bits<3> val> {
+ bits<3> Value = val;
+}
+def SizeInvalid : SizeFlagVal<0>; // Unset.
+def SizeSpecial : SizeFlagVal<1>; // Pseudo or special.
+def Size8Bytes : SizeFlagVal<2>;
+def Size4Bytes : SizeFlagVal<3>;
+def Size2Bytes : SizeFlagVal<4>;
+
+// Load / store index mode.
+class IndexMode<bits<2> val> {
+ bits<2> Value = val;
+}
+def IndexModeNone : IndexMode<0>;
+def IndexModePre : IndexMode<1>;
+def IndexModePost : IndexMode<2>;
+def IndexModeUpd : IndexMode<3>;
+
+// Instruction execution domain.
+class Domain<bits<2> val> {
+ bits<2> Value = val;
+}
+def GenericDomain : Domain<0>;
+def VFPDomain : Domain<1>; // Instructions in VFP domain only
+def NeonDomain : Domain<2>; // Instructions in Neon domain only
+def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
+
+//===----------------------------------------------------------------------===//
+// ARM special operands.
+//
+
+def CondCodeOperand : AsmOperandClass {
+ let Name = "CondCode";
+ let SuperClasses = [];
+}
+
+def CCOutOperand : AsmOperandClass {
+ let Name = "CCOut";
+ let SuperClasses = [];
+}
+
+def MemBarrierOptOperand : AsmOperandClass {
+ let Name = "MemBarrierOpt";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseMemBarrierOptOperand";
+}
+
+def ProcIFlagsOperand : AsmOperandClass {
+ let Name = "ProcIFlags";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseProcIFlagsOperand";
+}
+
+def MSRMaskOperand : AsmOperandClass {
+ let Name = "MSRMask";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseMSRMaskOperand";
+}
+
+// ARM imod and iflag operands, used only by the CPS instruction.
+def imod_op : Operand<i32> {
+ let PrintMethod = "printCPSIMod";
+}
+
+def iflags_op : Operand<i32> {
+ let PrintMethod = "printCPSIFlag";
+ let ParserMatchClass = ProcIFlagsOperand;
+}
+
+// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
+// register whose default is 0 (no register).
+def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
+ (ops (i32 14), (i32 zero_reg))> {
+ let PrintMethod = "printPredicateOperand";
+ let ParserMatchClass = CondCodeOperand;
+}
+
+// Conditional code result for instructions whose 's' bit is set, e.g. subs.
+def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
+ let EncoderMethod = "getCCOutOpValue";
+ let PrintMethod = "printSBitModifierOperand";
+ let ParserMatchClass = CCOutOperand;
+}
+
+// Same as cc_out except it defaults to setting CPSR.
+def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
+ let EncoderMethod = "getCCOutOpValue";
+ let PrintMethod = "printSBitModifierOperand";
+ let ParserMatchClass = CCOutOperand;
+}
+
+// ARM special operands for disassembly only.
+//
+def setend_op : Operand<i32> {
+ let PrintMethod = "printSetendOperand";
+}
+
+def cps_opt : Operand<i32> {
+ let PrintMethod = "printCPSOptionOperand";
+}
+
+def msr_mask : Operand<i32> {
+ let PrintMethod = "printMSRMaskOperand";
+ let ParserMatchClass = MSRMaskOperand;
+}
+
+// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0.
+// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
+def neg_zero : Operand<i32> {
+ let PrintMethod = "printNegZeroOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction templates.
+//
+
+class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
+ Format f, Domain d, string cstr, InstrItinClass itin>
+ : Instruction {
+ let Namespace = "ARM";
+
+ AddrMode AM = am;
+ SizeFlagVal SZ = sz;
+ IndexMode IM = im;
+ bits<2> IndexModeBits = IM.Value;
+ Format F = f;
+ bits<6> Form = F.Value;
+ Domain D = d;
+ bit isUnaryDataProc = 0;
+ bit canXformTo16Bit = 0;
+
+ // If this is a pseudo instruction, mark it isCodeGenOnly.
+ let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
+ // The layout of TSFlags should be kept in sync with ARMBaseInstrInfo.h.
+ let TSFlags{4-0} = AM.Value;
+ let TSFlags{7-5} = SZ.Value;
+ let TSFlags{9-8} = IndexModeBits;
+ let TSFlags{15-10} = Form;
+ let TSFlags{16} = isUnaryDataProc;
+ let TSFlags{17} = canXformTo16Bit;
+ let TSFlags{19-18} = D.Value;
+
+ let Constraints = cstr;
+ let Itinerary = itin;
+}
+
+class Encoding {
+ field bits<32> Inst;
+}
+
+class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im,
+ Format f, Domain d, string cstr, InstrItinClass itin>
+ : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding;
+
+// This Encoding-less class is used by Thumb1 to specify the encoding bits later
+// on by adding flavors to specific instructions.
+class InstThumb<AddrMode am, SizeFlagVal sz, IndexMode im,
+ Format f, Domain d, string cstr, InstrItinClass itin>
+ : InstTemplate<am, sz, im, f, d, cstr, itin>;
+
+class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
+ // FIXME: This really should derive from InstTemplate instead, as pseudos
+ // don't need encoding information. TableGen doesn't like that
+ // currently. Need to figure out why and fix it.
+ : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain,
+ "", itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let Pattern = pattern;
+}
+
+// PseudoInst that's ARM-mode only.
+class ARMPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let SZ = sz;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// PseudoInst that's Thumb-mode only.
+class tPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let SZ = sz;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// PseudoInst that's Thumb2-mode only.
+class t2PseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let SZ = sz;
+ list<Predicate> Predicates = [IsThumb2];
+}
+// Almost all ARM instructions are predicable.
+class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// A few are not predicable
+class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = !strconcat(opc, asm);
+ let Pattern = pattern;
+ let isPredicable = 0;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// Same as I except it can optionally modify CPSR. Note it's modeled as an input
+// operand since by default it's a zero register. It will become an implicit def
+// once it's "flipped".
+class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ bits<4> p; // Predicate operand
+ bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+ let Inst{31-28} = p;
+ let Inst{20} = s;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// Special cases
+class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+class AI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
+class AsI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
+class AXI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+ asm, "", pattern>;
+class AInoP<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : InoP<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
+
+// Ctrl flow instructions
+class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{27-24} = opcod;
+}
+class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, itin,
+ asm, "", pattern> {
+ let Inst{27-24} = opcod;
+}
+
+// BR_JT instructions
+class JTI<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin,
+ asm, "", pattern>;
+
+// Atomic load/store instructions
+class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rt;
+ bits<4> Rn;
+ let Inst{27-23} = 0b00011;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = 0b111110011111;
+}
+class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rt;
+ bits<4> Rn;
+ let Inst{27-23} = 0b00011;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-4} = 0b11111001;
+ let Inst{3-0} = Rt;
+}
+class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
+ : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, [$Rn]", pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> Rn;
+ let Inst{27-23} = 0b00010;
+ let Inst{22} = b;
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt;
+ let Inst{11-4} = 0b00001001;
+ let Inst{3-0} = Rt2;
+}
+
+// addrmode1 instructions
+class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = 0b00;
+}
+class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = 0b00;
+}
+class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
+ asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = 0b00;
+}
+
+// loads
+
+// LDR/LDRB/STR/STRB/...
+class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am,
+ Format f, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : I<oops, iops, am, Size4Bytes, IndexModeNone, f, itin, opc, asm,
+ "", pattern> {
+ let Inst{27-25} = op;
+ let Inst{24} = 1; // 24 == P
+ // 23 == U
+ let Inst{22} = isByte;
+ let Inst{21} = 0; // 21 == W
+ let Inst{20} = isLd;
+}
+// Indexed load/stores
+class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, im, f, itin,
+ opc, asm, cstr, pattern> {
+ bits<4> Rt;
+ let Inst{27-26} = 0b01;
+ let Inst{24} = isPre; // P bit
+ let Inst{22} = isByte; // B bit
+ let Inst{21} = isPre; // W bit
+ let Inst{20} = isLd; // L bit
+ let Inst{15-12} = Rt;
+}
+class AI2stridx<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> Rn;
+ let Inst{25} = offset{13};
+ let Inst{23} = offset{12};
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = offset{11-0};
+}
+
+// addrmode3 instructions
+class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ bits<14> addr;
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = 1; // P bit
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{21} = 0; // W bit
+ let Inst{20} = op20; // L bit
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+}
+
+class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, im, f, itin,
+ opc, asm, cstr, pattern> {
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = isPre; // P bit
+ let Inst{21} = isPre; // W bit
+ let Inst{20} = op20; // L bit
+ let Inst{15-12} = Rt; // Rt
+ let Inst{7-4} = op;
+}
+class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM3 store w/ two operands: (GPR, am3offset)
+ bits<14> offset;
+ bits<4> Rt;
+ bits<4> Rn;
+ let Inst{27-25} = 0b000;
+ let Inst{23} = offset{8};
+ let Inst{22} = offset{9};
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+}
+
+// stores
+class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ bits<14> addr;
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = 1; // P bit
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{21} = 0; // W bit
+ let Inst{20} = 0; // L bit
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+}
+
+// Pre-indexed stores
+class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+ opc, asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-25} = 0b000;
+}
+class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+ opc, asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-25} = 0b000;
+}
+
+// Post-indexed stores
+class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+ opc, asm, cstr,pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-25} = 0b000;
+}
+class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+ opc, asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-25} = 0b000;
+}
+
+// addrmode4 instructions
+class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin, asm, cstr, pattern> {
+ bits<4> p;
+ bits<16> regs;
+ bits<4> Rn;
+ let Inst{31-28} = p;
+ let Inst{27-25} = 0b100;
+ let Inst{22} = 0; // S bit
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+}
+
+// Unsigned multiply, multiply-accumulate instructions.
+class AMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{20} = 0; // S bit
+ let Inst{27-21} = opcod;
+}
+class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{27-21} = opcod;
+}
+
+// Most significant word multiply
+class AMul2I<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{7-4} = opc7_4;
+ let Inst{20} = 1;
+ let Inst{27-21} = opcod;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+// MSW multiple w/ Ra operand
+class AMul2Ia<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMul2I<opcod, opc7_4, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+
+// SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
+class AMulxyIbase<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{4} = 0;
+ let Inst{7} = 1;
+ let Inst{20} = 0;
+ let Inst{27-21} = opcod;
+ let Inst{6-5} = bit6_5;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AMulxyI<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ let Inst{19-16} = Rd;
+}
+
+// AMulxyI with Ra operand
+class AMulxyIa<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyI<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+// SMLAL*
+class AMulxyI64<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+}
+
+// Extend instructions.
+class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ExtFrm, itin,
+ opc, asm, "", pattern> {
+ // All AExtI instructions have Rd and Rm register operands.
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{7-4} = 0b0111;
+ let Inst{9-8} = 0b00;
+ let Inst{27-20} = opcod;
+}
+
+// Misc Arithmetic instructions.
+class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{27-20} = opcod;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
+
+// PKH instructions
+class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<8> sh;
+ let Inst{27-20} = opcod;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{7-3};
+ let Inst{6} = tb;
+ let Inst{5-4} = 0b01;
+ let Inst{3-0} = Rm;
+}
+
+//===----------------------------------------------------------------------===//
+
+// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode.
+class ARMPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM];
+}
+class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5TE];
+}
+class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV6];
+}
+
+//===----------------------------------------------------------------------===//
+// Thumb Instruction Format Definitions.
+//
+
+class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// TI - Thumb instruction.
+class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>;
+
+// Two-address instructions
+class TIt<dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "$lhs = $dst",
+ pattern>;
+
+// tBL, tBX 32-bit instructions
+class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
+ dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>,
+ Encoding {
+ let Inst{31-27} = opcod1;
+ let Inst{15-14} = opcod2;
+ let Inst{12} = opcod3;
+}
+
+// Move to/from coprocessor instructions
+class T1Cop<dag oops, dag iops, string asm, list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, Size4Bytes, NoItinerary, asm, "", pattern>,
+ Encoding, Requires<[IsThumb, HasV6]> {
+ let Inst{31-28} = 0b1110;
+}
+
+// BR_JT instructions
+class TJTI<dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
+
+// Thumb1 only
+class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1I<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>;
+class T1Ix2<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb1I<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>;
+
+// Two-address instructions
+class T1It<dag oops, dag iops, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin,
+ asm, cstr, pattern>;
+
+// Thumb1 instruction that can either be predicated or set CPSR.
+class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = !con(oops, (outs s_cc_out:$s));
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1sI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1sIt<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
+ "$Rn = $Rdn", pattern>;
+
+// Thumb1 instruction that can be predicated.
+class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1pI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1pIt<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
+ "$Rn = $Rdn", pattern>;
+
+class T1pIs<dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : Thumb1pI<oops, iops, AddrModeT1_s, Size2Bytes, itin, opc, asm, "", pattern>;
+
+class Encoding16 : Encoding {
+ let Inst{31-16} = 0x0000;
+}
+
+// A6.2 16-bit Thumb instruction encoding
+class T1Encoding<bits<6> opcode> : Encoding16 {
+ let Inst{15-10} = opcode;
+}
+
+// A6.2.1 Shift (immediate), add, subtract, move, and compare encoding.
+class T1General<bits<5> opcode> : Encoding16 {
+ let Inst{15-14} = 0b00;
+ let Inst{13-9} = opcode;
+}
+
+// A6.2.2 Data-processing encoding.
+class T1DataProcessing<bits<4> opcode> : Encoding16 {
+ let Inst{15-10} = 0b010000;
+ let Inst{9-6} = opcode;
+}
+
+// A6.2.3 Special data instructions and branch and exchange encoding.
+class T1Special<bits<4> opcode> : Encoding16 {
+ let Inst{15-10} = 0b010001;
+ let Inst{9-6} = opcode;
+}
+
+// A6.2.4 Load/store single data item encoding.
+class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 {
+ let Inst{15-12} = opA;
+ let Inst{11-9} = opB;
+}
+class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
+
+// Helper classes to encode Thumb1 loads and stores. For immediates, the
+// following bits are used for "opA" (see A6.2.4):
+//
+// 0b0110 => Immediate, 4 bytes
+// 0b1000 => Immediate, 2 bytes
+// 0b0111 => Immediate, 1 byte
+class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>,
+ T1LoadStore<0b0101, opcode> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{8-6} = addr{5-3}; // Rm
+ let Inst{5-3} = addr{2-0}; // Rn
+ let Inst{2-0} = Rt;
+}
+class T1pILdStEncodeImm<bits<4> opA, bit opB, dag oops, dag iops, AddrMode am,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>,
+ T1LoadStore<opA, {opB,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-6} = addr{7-3}; // imm5
+ let Inst{5-3} = addr{2-0}; // Rn
+ let Inst{2-0} = Rt;
+}
+
+// A6.2.5 Miscellaneous 16-bit instructions encoding.
+class T1Misc<bits<7> opcode> : Encoding16 {
+ let Inst{15-12} = 0b1011;
+ let Inst{11-5} = opcode;
+}
+
+// Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable.
+class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+}
+
+// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
+// input operand since by default it's a zero register. It will become an
+// implicit def once it's "flipped".
+//
+// FIXME: This uses unified syntax so {s} comes before {p}. We should make it
+// more consistent.
+class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+ let Inst{20} = s;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+}
+
+// Special cases
+class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+}
+
+class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T2I<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>;
+class T2Ii12<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, itin, opc, asm, "",pattern>;
+class T2Ii8<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8, Size4Bytes, itin, opc, asm, "", pattern>;
+class T2Iso<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_so, Size4Bytes, itin, opc, asm, "", pattern>;
+class T2Ipc<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_pc, Size4Bytes, itin, opc, asm, "", pattern>;
+class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8s4, Size4Bytes, itin, opc, asm, "",
+ pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<13> addr;
+ let Inst{31-25} = 0b1110100;
+ let Inst{24} = P;
+ let Inst{23} = addr{8};
+ let Inst{22} = 1;
+ let Inst{21} = W;
+ let Inst{20} = isLoad;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-8} = Rt2{3-0};
+ let Inst{7-0} = addr{7-0};
+}
+
+class T2sI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2sI<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>;
+
+class T2XI<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>;
+class T2JTI<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
+
+// Move to/from coprocessor instructions
+class T2Cop<dag oops, dag iops, string asm, list<dag> pattern>
+ : T2XI<oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2, HasV6]> {
+ let Inst{31-28} = 0b1111;
+}
+
+// Two-address instructions
+class T2XIt<dag oops, dag iops, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, cstr, pattern>;
+
+// T2Iidxldst - Thumb2 indexed load / store instructions.
+class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
+ dag oops, dag iops,
+ AddrMode am, IndexMode im, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = load;
+ let Inst{11} = 1;
+ // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
+ let Inst{10} = pre; // The P bit.
+ let Inst{8} = 1; // The W bit.
+
+ bits<9> addr;
+ let Inst{7-0} = addr{7-0};
+ let Inst{9} = addr{8}; // Sign bit
+
+ bits<4> Rt;
+ bits<4> Rn;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{19-16} = Rn{3-0};
+}
+
+// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
+class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only, HasV5T];
+}
+
+// T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode.
+class T1Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+// T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
+class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb2];
+}
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM VFP Instruction templates.
+//
+
+// Almost all VFP instructions are predicable.
+class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+ list<Predicate> Predicates = [HasVFP2];
+}
+
+// Special cases
+class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+ list<Predicate> Predicates = [HasVFP2];
+}
+
+class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+}
+
+// ARM VFP addrmode5 loads and stores
+class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+ VFPLdStFrm, itin, opc, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<13> addr;
+
+ // Encode instruction operands.
+ let Inst{23} = addr{8}; // U (add = (U == '1'))
+ let Inst{22} = Dd{4};
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Dd{3-0};
+ let Inst{7-0} = addr{7-0}; // imm8
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-24} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+
+ // Loads & stores operate on both NEON and VFP pipelines.
+ let D = VFPNeonDomain;
+}
+
+class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+ VFPLdStFrm, itin, opc, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<13> addr;
+
+ // Encode instruction operands.
+ let Inst{23} = addr{8}; // U (add = (U == '1'))
+ let Inst{22} = Sd{0};
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Sd{4-1};
+ let Inst{7-0} = addr{7-0}; // imm8
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-24} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+
+ // Loads & stores operate on both NEON and VFP pipelines.
+ let D = VFPNeonDomain;
+}
+
+// VFP Load / store multiple pseudo instructions.
+class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
+ list<dag> pattern>
+ : InstARM<AddrMode4, Size4Bytes, IndexModeNone, Pseudo, VFPNeonDomain,
+ cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasVFP2];
+}
+
+// Load / store multiple
+class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
+ VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<4> Rn;
+ bits<13> regs;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Rn;
+ let Inst{22} = regs{12};
+ let Inst{15-12} = regs{11-8};
+ let Inst{7-0} = regs{7-0};
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-25} = 0b110;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+}
+
+class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
+ VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<4> Rn;
+ bits<13> regs;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Rn;
+ let Inst{22} = regs{8};
+ let Inst{15-12} = regs{12-9};
+ let Inst{7-0} = regs{7-0};
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-25} = 0b110;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+}
+
+// Double precision, unary
+class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
+}
+
+// Double precision, binary
+class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dn;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{19-16} = Dn{3-0};
+ let Inst{7} = Dn{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
+// Single precision, unary
+class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
+}
+
+// Single precision unary, if no NEON. Same as ASuI except not available if
+// NEON is enabled.
+class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : ASuI<opcod1, opcod2, opcod3, opcod4, opcod5, oops, iops, itin, opc, asm,
+ pattern> {
+ list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
+// Single precision, binary
+class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
+// Single precision binary, if no NEON. Same as ASbI except not available if
+// NEON is enabled.
+class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
+ list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+// VFP conversion instructions
+class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, pattern> {
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-8} = opcod4;
+ let Inst{6} = 1;
+ let Inst{4} = 0;
+}
+
+// VFP conversion between floating-point and fixed-point
+class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> {
+ // size (fixed-point number): sx == 0 ? 16 : 32
+ let Inst{7} = op5; // sx
+}
+
+// VFP conversion instructions, if no NEON
+class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
+class AVConvXI<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, Format f,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPAI<oops, iops, f, itin, opc, asm, pattern> {
+ let Inst{27-20} = opcod1;
+ let Inst{11-8} = opcod2;
+ let Inst{4} = 1;
+}
+
+class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, itin, opc, asm, pattern>;
+
+class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, itin, opc, asm, pattern>;
+
+class AVConv4I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv4Frm, itin, opc, asm, pattern>;
+
+class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv5Frm, itin, opc, asm, pattern>;
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM NEON Instruction templates.
+//
+
+class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+ InstrItinClass itin, string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+}
+
+// Same as NeonI except it does not have a "data type" specifier.
+class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+}
+
+class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm,
+ cstr, pattern> {
+ let Inst{31-24} = 0b11110100;
+ let Inst{23} = op23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{7-4} = op7_4;
+
+ let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder";
+
+ bits<5> Vd;
+ bits<6> Rn;
+ bits<4> Rm;
+
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{19-16} = Rn{3-0};
+ let Inst{3-0} = Rm{3-0};
+}
+
+class NLdStLn<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NLdSt<op23, op21_20, op11_8, op7_4, oops, iops, itin, opc,
+ dt, asm, cstr, pattern> {
+ bits<3> lane;
+}
+
+class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
+ : InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ list<Predicate> Predicates = [HasNEON];
+}
+
+class PseudoNeonI<dag oops, dag iops, InstrItinClass itin, string cstr,
+ list<dag> pattern>
+ : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+}
+
+class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr,
+ pattern> {
+ let Inst{31-25} = 0b1111001;
+ let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+}
+
+class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NeonXI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, asm,
+ cstr, pattern> {
+ let Inst{31-25} = 0b1111001;
+ let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+}
+
+// NEON "one register and a modified immediate" format.
+class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
+ bit op5, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{23} = op23;
+ let Inst{21-19} = op21_19;
+ let Inst{11-8} = op11_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{5} = op5;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<13> SIMM;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{24} = SIMM{7};
+ let Inst{18-16} = SIMM{6-4};
+ let Inst{3-0} = SIMM{3-0};
+}
+
+// NEON 2 vector register format.
+class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NDataI<oops, iops, N2RegFrm, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{24-23} = op24_23;
+ let Inst{21-20} = op21_20;
+ let Inst{19-18} = op19_18;
+ let Inst{17-16} = op17_16;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// Same as N2V except it doesn't have a datatype suffix.
+class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, N2RegFrm, itin, opc, asm, cstr, pattern> {
+ let Inst{24-23} = op24_23;
+ let Inst{21-20} = op21_20;
+ let Inst{19-18} = op19_18;
+ let Inst{17-16} = op17_16;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// NEON 2 vector register with immediate.
+class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{24} = op24;
+ let Inst{23} = op23;
+ let Inst{11-8} = op11_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+ bits<6> SIMM;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{21-16} = SIMM{5-0};
+}
+
+// NEON 3 vector register format.
+class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{24} = op24;
+ let Inst{23} = op23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// Same as N3V except it doesn't have a data type suffix.
+class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> {
+ let Inst{24} = op24;
+ let Inst{23} = op23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// NEON VMOVs between scalar and core registers.
+class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, NeonDomain,
+ "", itin> {
+ let Inst{27-20} = opcod1;
+ let Inst{11-8} = opcod2;
+ let Inst{6-5} = opcod3;
+ let Inst{4} = 1;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+
+ let PostEncoderMethod = "NEONThumb2DupPostEncoder";
+
+ bits<5> V;
+ bits<4> R;
+ bits<4> p;
+ bits<4> lane;
+
+ let Inst{31-28} = p{3-0};
+ let Inst{7} = V{4};
+ let Inst{19-16} = V{3-0};
+ let Inst{15-12} = R{3-0};
+}
+class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NGetLnFrm, itin,
+ opc, dt, asm, pattern>;
+class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NSetLnFrm, itin,
+ opc, dt, asm, pattern>;
+class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NDupFrm, itin,
+ opc, dt, asm, pattern>;
+
+// Vector Duplicate Lane (from scalar to all elements)
+class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
+ InstrItinClass itin, string opc, string dt, string asm,
+ list<dag> pattern>
+ : NDataI<oops, iops, NVDupLnFrm, itin, opc, dt, asm, "", pattern> {
+ let Inst{24-23} = 0b11;
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = op19_16;
+ let Inst{11-7} = 0b11000;
+ let Inst{6} = op6;
+ let Inst{4} = 0;
+
+ bits<5> Vd;
+ bits<5> Vm;
+ bits<4> lane;
+
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{3-0} = Vm{3-0};
+}
+
+// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
+// for single-precision FP.
+class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [HasNEON,UseNEONForFP];
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
new file mode 100644
index 0000000..6f48d96
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -0,0 +1,61 @@
+//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstrInfo.h"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+using namespace llvm;
+
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
+ switch (Opc) {
+ default: break;
+ case ARM::LDR_PRE:
+ case ARM::LDR_POST:
+ return ARM::LDRi12;
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ return ARM::LDRH;
+ case ARM::LDRB_PRE:
+ case ARM::LDRB_POST:
+ return ARM::LDRBi12;
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ return ARM::LDRSH;
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ return ARM::LDRSB;
+ case ARM::STR_PRE:
+ case ARM::STR_POST:
+ return ARM::STRi12;
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ return ARM::STRH;
+ case ARM::STRB_PRE:
+ case ARM::STRB_POST:
+ return ARM::STRBi12;
+ }
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
new file mode 100644
index 0000000..f2c7bdc
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
@@ -0,0 +1,44 @@
+//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTRUCTIONINFO_H
+#define ARMINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARM.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class ARMInstrInfo : public ARMBaseInstrInfo {
+ ARMRegisterInfo RI;
+public:
+ explicit ARMInstrInfo(const ARMSubtarget &STI);
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ unsigned getUnindexedOpcode(unsigned Opc) const;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const ARMRegisterInfo &getRegisterInfo() const { return RI; }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
new file mode 100644
index 0000000..c827ce3d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -0,0 +1,3909 @@
+//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM specific DAG Nodes.
+//
+
+// Type profiles.
+def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
+
+def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
+
+def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMCMov : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisVT<3, i32>]>;
+
+def SDT_ARMBrcond : SDTypeProfile<0, 2,
+ [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+
+def SDT_ARMBrJT : SDTypeProfile<0, 3,
+ [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
+def SDT_ARMBr2JT : SDTypeProfile<0, 4,
+ [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
+ [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>, SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>, SDTCisVT<4, i32>,
+ SDTCisVT<5, OtherVT>]>;
+
+def SDT_ARMAnd : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
+def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
+
+def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
+ SDTCisInt<2>]>;
+def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+
+def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+// Node definitions.
+def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
+def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>;
+def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
+def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
+
+def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
+ [SDNPInGlue]>;
+def ARMcneg : SDNode<"ARMISD::CNEG", SDT_ARMCMov,
+ [SDNPInGlue]>;
+
+def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
+def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
+ [SDNPHasChain]>;
+def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
+ [SDNPHasChain]>;
+
+def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
+ [SDNPHasChain]>;
+
+def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
+ [SDNPOutGlue]>;
+
+def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
+ [SDNPOutGlue, SDNPCommutative]>;
+
+def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+
+def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
+
+def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
+def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
+ SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
+ SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP",
+ SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>;
+
+
+def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMPreload : SDNode<"ARMISD::PRELOAD", SDTPrefetch,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+
+def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
+
+def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+
+def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction Predicate Definitions.
+//
+def HasV4T : Predicate<"Subtarget->hasV4TOps()">, AssemblerPredicate;
+def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">, AssemblerPredicate;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">, AssemblerPredicate;
+def NoV6 : Predicate<"!Subtarget->hasV6Ops()">;
+def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">, AssemblerPredicate;
+def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV7 : Predicate<"Subtarget->hasV7Ops()">, AssemblerPredicate;
+def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
+def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate;
+def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate;
+def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate;
+def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate;
+def HasDivide : Predicate<"Subtarget->hasDivide()">, AssemblerPredicate;
+def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
+ AssemblerPredicate;
+def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
+ AssemblerPredicate;
+def HasMP : Predicate<"Subtarget->hasMPExtension()">,
+ AssemblerPredicate;
+def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">, AssemblerPredicate;
+def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
+def IsThumb2 : Predicate<"Subtarget->isThumb2()">, AssemblerPredicate;
+def IsARM : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate;
+def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
+def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
+
+// FIXME: Eventually this will be just "hasV6T2Ops".
+def UseMovt : Predicate<"Subtarget->useMovt()">;
+def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
+
+//===----------------------------------------------------------------------===//
+// ARM Flag Definitions.
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+
+//===----------------------------------------------------------------------===//
+// ARM specific transformation functions and pattern fragments.
+//
+
+// so_imm_neg_XFORM - Return a so_imm value packed into the format described for
+// so_imm_neg def below.
+def so_imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
+}]>;
+
+// so_imm_not_XFORM - Return a so_imm value packed into the format described for
+// so_imm_not def below.
+def so_imm_not_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
+}]>;
+
+/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
+def imm1_15 : PatLeaf<(i32 imm), [{
+ return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16;
+}]>;
+
+/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
+def imm16_31 : PatLeaf<(i32 imm), [{
+ return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32;
+}]>;
+
+def so_imm_neg :
+ PatLeaf<(imm), [{
+ return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1;
+ }], so_imm_neg_XFORM>;
+
+def so_imm_not :
+ PatLeaf<(imm), [{
+ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
+ }], so_imm_not_XFORM>;
+
+// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
+def sext_16_node : PatLeaf<(i32 GPR:$a), [{
+ return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+}]>;
+
+/// Split a 32-bit immediate into two 16 bit parts.
+def hi16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
+}]>;
+
+def lo16AllZero : PatLeaf<(i32 imm), [{
+ // Returns true if all low 16-bits are 0.
+ return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
+}], hi16>;
+
+/// imm0_65535 predicate - True if the 32-bit immediate is in the range
+/// [0.65535].
+def imm0_65535 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getZExtValue() < 65536;
+}]>;
+
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
+
+/// adde and sube predicates - True based on whether the carry flag output
+/// will be needed or not.
+def adde_dead_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
+ [{return !N->hasAnyUseOfValue(1);}]>;
+def sube_dead_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
+ [{return !N->hasAnyUseOfValue(1);}]>;
+def adde_live_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
+ [{return N->hasAnyUseOfValue(1);}]>;
+def sube_live_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
+ [{return N->hasAnyUseOfValue(1);}]>;
+
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+// An 'xor' node with a single use.
+def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+// An 'fmul' node with a single use.
+def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{
+ return N->hasOneUse();
+}]>;
+
+// An 'fadd' node which checks for single non-hazardous use.
+def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{
+ return hasNoVMLxHazardUse(N);
+}]>;
+
+// An 'fsub' node which checks for single non-hazardous use.
+def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
+ return hasNoVMLxHazardUse(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// Branch target.
+// FIXME: rename brtarget to t2_brtarget
+def brtarget : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTargetOpValue";
+}
+
+// FIXME: get rid of this one?
+def uncondbrtarget : Operand<OtherVT> {
+ let EncoderMethod = "getUnconditionalBranchTargetOpValue";
+}
+
+// Branch target for ARM. Handles conditional/unconditional
+def br_target : Operand<OtherVT> {
+ let EncoderMethod = "getARMBranchTargetOpValue";
+}
+
+// Call target.
+// FIXME: rename bltarget to t2_bl_target?
+def bltarget : Operand<i32> {
+ // Encoded the same as branch targets.
+ let EncoderMethod = "getBranchTargetOpValue";
+}
+
+// Call target for ARM. Handles conditional/unconditional
+// FIXME: rename bl_target to t2_bltarget?
+def bl_target : Operand<i32> {
+ // Encoded the same as branch targets.
+ let EncoderMethod = "getARMBranchTargetOpValue";
+}
+
+
+// A list of registers separated by comma. Used by load/store multiple.
+def RegListAsmOperand : AsmOperandClass {
+ let Name = "RegList";
+ let SuperClasses = [];
+}
+
+def DPRRegListAsmOperand : AsmOperandClass {
+ let Name = "DPRRegList";
+ let SuperClasses = [];
+}
+
+def SPRRegListAsmOperand : AsmOperandClass {
+ let Name = "SPRRegList";
+ let SuperClasses = [];
+}
+
+def reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = RegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+}
+
+def dpr_reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = DPRRegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+}
+
+def spr_reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = SPRRegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+}
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+ let PrintMethod = "printCPInstOperand";
+}
+
+// Local PC labels.
+def pclabel : Operand<i32> {
+ let PrintMethod = "printPCLabel";
+}
+
+// ADR instruction labels.
+def adrlabel : Operand<i32> {
+ let EncoderMethod = "getAdrLabelOpValue";
+}
+
+def neon_vcvt_imm32 : Operand<i32> {
+ let EncoderMethod = "getNEONVcvtImm32OpValue";
+}
+
+// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
+def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{
+ int32_t v = (int32_t)N->getZExtValue();
+ return v == 8 || v == 16 || v == 24; }]> {
+ let EncoderMethod = "getRotImmOpValue";
+}
+
+// shift_imm: An integer that encodes a shift amount and the type of shift
+// (currently either asr or lsl) using the same encoding used for the
+// immediates in so_reg operands.
+def shift_imm : Operand<i32> {
+ let PrintMethod = "printShiftImmOperand";
+}
+
+// shifter_operand operands: so_reg and so_imm.
+def so_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectShifterOperandReg",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getSORegOpValue";
+ let PrintMethod = "printSORegOperand";
+ let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
+def shift_so_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getSORegOpValue";
+ let PrintMethod = "printSORegOperand";
+ let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
+
+// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits.
+def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> {
+ let EncoderMethod = "getSOImmOpValue";
+ let PrintMethod = "printSOImmOperand";
+}
+
+// Break so_imm's up into two pieces. This handles immediates with up to 16
+// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to
+// get the first/second pieces.
+def so_imm2part : PatLeaf<(imm), [{
+ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+}]>;
+
+/// arm_i32imm - True for +V6T2, or true only if so_imm2part is true.
+///
+def arm_i32imm : PatLeaf<(imm), [{
+ if (Subtarget->hasV6T2Ops())
+ return true;
+ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+}]>;
+
+/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
+def imm0_31 : Operand<i32>, PatLeaf<(imm), [{
+ return (int32_t)N->getZExtValue() < 32;
+}]>;
+
+/// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'.
+def imm0_31_m1 : Operand<i32>, PatLeaf<(imm), [{
+ return (int32_t)N->getZExtValue() < 32;
+}]> {
+ let EncoderMethod = "getImmMinusOneOpValue";
+}
+
+// i32imm_hilo16 - For movt/movw - sets the MC Encoder method.
+// The imm is split into imm{15-12}, imm{11-0}
+//
+def i32imm_hilo16 : Operand<i32> {
+ let EncoderMethod = "getHiLo16ImmOpValue";
+}
+
+/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
+/// e.g., 0xf000ffff
+def bf_inv_mask_imm : Operand<i32>,
+ PatLeaf<(imm), [{
+ return ARM::isBitFieldInvertedMask(N->getZExtValue());
+}] > {
+ let EncoderMethod = "getBitfieldInvertedMaskOpValue";
+ let PrintMethod = "printBitfieldInvMaskImmOperand";
+}
+
+/// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p
+def lsb_pos_imm : Operand<i32>, PatLeaf<(imm), [{
+ return isInt<5>(N->getSExtValue());
+}]>;
+
+/// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p
+def width_imm : Operand<i32>, PatLeaf<(imm), [{
+ return N->getSExtValue() > 0 && N->getSExtValue() <= 32;
+}] > {
+ let EncoderMethod = "getMsbOpValue";
+}
+
+// Define ARM specific addressing modes.
+
+
+// addrmode_imm12 := reg +/- imm12
+//
+def addrmode_imm12 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
+ // 12-bit immediate operand. Note that instructions using this encode
+ // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
+ // immediate values are as normal.
+
+ let EncoderMethod = "getAddrModeImm12OpValue";
+ let PrintMethod = "printAddrModeImm12Operand";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+// ldst_so_reg := reg +/- reg shop imm
+//
+def ldst_so_reg : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
+ let EncoderMethod = "getLdStSORegOpValue";
+ // FIXME: Simplify the printer
+ let PrintMethod = "printAddrMode2Operand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+// addrmode2 := reg +/- imm12
+// := reg +/- reg shop imm
+//
+def addrmode2 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+ let EncoderMethod = "getAddrMode2OpValue";
+ let PrintMethod = "printAddrMode2Operand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am2offset : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode2Offset",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode2OffsetOpValue";
+ let PrintMethod = "printAddrMode2OffsetOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode3 := reg +/- reg
+// addrmode3 := reg +/- imm8
+//
+def addrmode3 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+ let EncoderMethod = "getAddrMode3OpValue";
+ let PrintMethod = "printAddrMode3Operand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am3offset : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode3Offset",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode3OffsetOpValue";
+ let PrintMethod = "printAddrMode3OffsetOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// ldstm_mode := {ia, ib, da, db}
+//
+def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
+ let EncoderMethod = "getLdStmModeOpValue";
+ let PrintMethod = "printLdStmModeOperand";
+}
+
+def MemMode5AsmOperand : AsmOperandClass {
+ let Name = "MemMode5";
+ let SuperClasses = [];
+}
+
+// addrmode5 := reg +/- imm8*4
+//
+def addrmode5 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode5", []> {
+ let PrintMethod = "printAddrMode5Operand";
+ let MIOperandInfo = (ops GPR:$base, i32imm);
+ let ParserMatchClass = MemMode5AsmOperand;
+ let EncoderMethod = "getAddrMode5OpValue";
+}
+
+// addrmode6 := reg with optional alignment
+//
+def addrmode6 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6AddressOpValue";
+}
+
+def am6offset : Operand<i32> {
+ let PrintMethod = "printAddrMode6OffsetOperand";
+ let MIOperandInfo = (ops GPR);
+ let EncoderMethod = "getAddrMode6OffsetOpValue";
+}
+
+// Special version of addrmode6 to handle alignment encoding for VLD-dup
+// instructions, specifically VLD4-dup.
+def addrmode6dup : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6DupAddressOpValue";
+}
+
+// addrmodepc := pc + reg
+//
+def addrmodepc : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrModePC", []> {
+ let PrintMethod = "printAddrModePCOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+def nohash_imm : Operand<i32> {
+ let PrintMethod = "printNoHashImmediate";
+}
+
+def CoprocNumAsmOperand : AsmOperandClass {
+ let Name = "CoprocNum";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseCoprocNumOperand";
+}
+
+def CoprocRegAsmOperand : AsmOperandClass {
+ let Name = "CoprocReg";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseCoprocRegOperand";
+}
+
+def p_imm : Operand<i32> {
+ let PrintMethod = "printPImmediate";
+ let ParserMatchClass = CoprocNumAsmOperand;
+}
+
+def c_imm : Operand<i32> {
+ let PrintMethod = "printCImmediate";
+ let ParserMatchClass = CoprocRegAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
+/// binop that produces a value.
+multiclass AsI1_bin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+ }
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{25} = 0;
+ let isCommutable = Commutable;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+ }
+ def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = shift;
+ }
+}
+
+/// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CPSR register.
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass AI1_bin_s_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+ }
+ def rr : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let isCommutable = Commutable;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+ }
+ def rs : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = shift;
+ }
+}
+}
+
+/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+let isCompare = 1, Defs = [CPSR] in {
+multiclass AI1_cmp_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii,
+ opc, "\t$Rn, $imm",
+ [(opnode GPR:$Rn, so_imm:$imm)]> {
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-0} = imm;
+ }
+ def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
+ opc, "\t$Rn, $Rm",
+ [(opnode GPR:$Rn, GPR:$Rm)]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let isCommutable = Commutable;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+ }
+ def rs : AI1<opcod, (outs), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, iis,
+ opc, "\t$Rn, $shift",
+ [(opnode GPR:$Rn, so_reg:$shift)]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-0} = shift;
+ }
+}
+}
+
+/// AI_ext_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+/// FIXME: Remove the 'r' variant. Its rot_imm is zero.
+multiclass AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+ def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iEXTr, opc, "\t$Rd, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = 0b00;
+ let Inst{3-0} = Rm;
+ }
+ def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
+ [(set GPR:$Rd, (opnode (rotr GPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<2> rot;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{3-0} = Rm;
+ }
+}
+
+multiclass AI_ext_rrot_np<bits<8> opcod, string opc> {
+ def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iEXTr, opc, "\t$Rd, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{19-16} = 0b1111;
+ let Inst{11-10} = 0b00;
+ }
+ def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ bits<2> rot;
+ let Inst{19-16} = 0b1111;
+ let Inst{11-10} = rot;
+ }
+}
+
+/// AI_exta_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+ def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = 0b00;
+ let Inst{9-4} = 0b000111;
+ let Inst{3-0} = Rm;
+ }
+ def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+ rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+ [(set GPR:$Rd, (opnode GPR:$Rn,
+ (rotr GPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{9-4} = 0b000111;
+ let Inst{3-0} = Rm;
+ }
+}
+
+// For disassembly only.
+multiclass AI_exta_rrot_np<bits<8> opcod, string opc> {
+ def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{11-10} = 0b00;
+ }
+ def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+ rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{11-10} = rot;
+ }
+}
+
+/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
+let Uses = [CPSR] in {
+multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let isCommutable = Commutable;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+ def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+}
+// Carry setting variants
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
+ def Sri : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, !strconcat(opc, "\t$Rd, $Rn, $imm"),
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+ let Inst{20} = 1;
+ let Inst{25} = 1;
+ }
+ def Srr : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, !strconcat(opc, "\t$Rd, $Rn, $Rm"),
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let isCommutable = Commutable;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{20} = 1;
+ let Inst{25} = 0;
+ }
+ def Srs : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$Rd, $Rn, $shift"),
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{20} = 1;
+ let Inst{25} = 0;
+ }
+}
+}
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12: AI2ldst<0b010, 1, isByte, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 1, isByte, (outs GPR:$Rt), (ins ldst_so_reg:$shift),
+ AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+ [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+}
+
+multiclass AI_str1<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12 : AI2ldst<0b010, 0, isByte, (outs),
+ (ins GPR:$Rt, addrmode_imm12:$addr),
+ AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr",
+ [(opnode GPR:$Rt, addrmode_imm12:$addr)]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPR:$Rt, ldst_so_reg:$shift),
+ AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+ [(opnode GPR:$Rt, ldst_so_reg:$shift)]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in
+/// the function. The first operand is the ID# for this instruction, the second
+/// is the index into the MachineConstantPool that this is, the third is the
+/// size in bytes of this constant pool entry.
+let neverHasSideEffects = 1, isNotDuplicable = 1 in
+def CONSTPOOL_ENTRY :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size), NoItinerary, []>;
+
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def ADJCALLSTACKUP :
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
+ [(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
+
+def ADJCALLSTACKDOWN :
+PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
+ [(ARMcallseq_start timm:$amt)]>;
+}
+
+def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-0} = 0b00000000;
+}
+
+def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-0} = 0b00000001;
+}
+
+def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-0} = 0b00000010;
+}
+
+def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-0} = 0b00000011;
+}
+
+def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel",
+ "\t$dst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{27-20} = 0b01101000;
+ let Inst{7-4} = 0b1011;
+ let Inst{11-8} = 0b1111;
+}
+
+def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-0} = 0b00000100;
+}
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM]> {
+ bits<16> val;
+ let Inst{3-0} = val{3-0};
+ let Inst{19-8} = val{15-4};
+ let Inst{27-20} = 0b00010010;
+ let Inst{7-4} = 0b0111;
+}
+
+// Change Processor State is a system instruction -- for disassembly and
+// parsing only.
+// FIXME: Since the asm parser has currently no clean way to handle optional
+// operands, create 3 versions of the same instruction. Once there's a clean
+// framework to represent optional operands, change this behavior.
+class CPS<dag iops, string asm_ops>
+ : AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops),
+ [/* For disassembly only; pattern left blank */]>, Requires<[IsARM]> {
+ bits<2> imod;
+ bits<3> iflags;
+ bits<5> mode;
+ bit M;
+
+ let Inst{31-28} = 0b1111;
+ let Inst{27-20} = 0b00010000;
+ let Inst{19-18} = imod;
+ let Inst{17} = M; // Enabled if mode is set;
+ let Inst{16} = 0;
+ let Inst{8-6} = iflags;
+ let Inst{5} = 0;
+ let Inst{4-0} = mode;
+}
+
+let M = 1 in
+ def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+ "$imod\t$iflags, $mode">;
+let mode = 0, M = 0 in
+ def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">;
+
+let imod = 0, iflags = 0, M = 1 in
+ def CPS1p : CPS<(ins i32imm:$mode), "\t$mode">;
+
+// Preload signals the memory system of possible future data/instruction access.
+// These are for disassembly only.
+multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
+
+ def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload,
+ !strconcat(opc, "\t$addr"),
+ [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{31-26} = 0b111101;
+ let Inst{25} = 0; // 0 for immediate form
+ let Inst{24} = data;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{22} = read;
+ let Inst{21-20} = 0b01;
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+
+ def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload,
+ !strconcat(opc, "\t$shift"),
+ [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]> {
+ bits<17> shift;
+ let Inst{31-26} = 0b111101;
+ let Inst{25} = 1; // 1 for register form
+ let Inst{24} = data;
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{22} = read;
+ let Inst{21-20} = 0b01;
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+
+defm PLD : APreLoad<1, 1, "pld">, Requires<[IsARM]>;
+defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
+defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>;
+
+def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary,
+ "setend\t$end",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM]> {
+ bits<1> end;
+ let Inst{31-10} = 0b1111000100000001000000;
+ let Inst{9} = end;
+ let Inst{8-0} = 0;
+}
+
+def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV7]> {
+ bits<4> opt;
+ let Inst{27-4} = 0b001100100000111100001111;
+ let Inst{3-0} = opt;
+}
+
+// A5.4 Permanently UNDEFINED instructions.
+let isBarrier = 1, isTerminator = 1 in
+def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
+ "trap", [(trap)]>,
+ Requires<[IsARM]> {
+ let Inst = 0xe7ffdefe;
+}
+
+// Address computation and loads and stores in PIC mode.
+let isNotDuplicable = 1 in {
+def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
+ Size4Bytes, IIC_iALUr,
+ [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+
+let AddedComplexity = 10 in {
+def PICLDR : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iLoad_r,
+ [(set GPR:$dst, (load addrmodepc:$addr))]>;
+
+def PICLDRH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (zextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (zextloadi8 addrmodepc:$addr))]>;
+
+def PICLDRSH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (sextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRSB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (sextloadi8 addrmodepc:$addr))]>;
+}
+let AddedComplexity = 10 in {
+def PICSTR : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>;
+
+def PICSTRH : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iStore_bh_r, [(truncstorei16 GPR:$src,
+ addrmodepc:$addr)]>;
+
+def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
+}
+} // isNotDuplicable = 1
+
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+// The 'adr' mnemonic encodes differently if the label is before or after
+// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
+// know until then which form of the instruction will be used.
+def ADR : AI1<0, (outs GPR:$Rd), (ins adrlabel:$label),
+ MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> {
+ bits<4> Rd;
+ bits<12> label;
+ let Inst{27-25} = 0b001;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = label;
+}
+def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
+ Size4Bytes, IIC_iALUi, []>;
+
+def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
+ (ins i32imm:$label, nohash_imm:$id, pred:$p),
+ Size4Bytes, IIC_iALUi, []>;
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ // ARMV4T and above
+ def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+ "bx", "\tlr", [(ARMretflag)]>,
+ Requires<[IsARM, HasV4T]> {
+ let Inst{27-0} = 0b0001001011111111111100011110;
+ }
+
+ // ARMV4 only
+ def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+ "mov", "\tpc, lr", [(ARMretflag)]>,
+ Requires<[IsARM, NoV4T]> {
+ let Inst{27-0} = 0b0001101000001111000000001110;
+ }
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ // ARMV4T and above
+ def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
+ [(brind GPR:$dst)]>,
+ Requires<[IsARM, HasV4T]> {
+ bits<4> dst;
+ let Inst{31-4} = 0b1110000100101111111111110001;
+ let Inst{3-0} = dst;
+ }
+
+ // ARMV4 only
+ // FIXME: We would really like to define this as a vanilla ARMPat like:
+ // ARMPat<(brind GPR:$dst), (MOVr PC, GPR:$dst)>
+ // With that, however, we can't set isBranch, isTerminator, etc..
+ def MOVPCRX : ARMPseudoInst<(outs), (ins GPR:$dst),
+ Size4Bytes, IIC_Br, [(brind GPR:$dst)]>,
+ Requires<[IsARM, NoV4T]>;
+}
+
+// All calls clobber the non-callee saved registers. SP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let isCall = 1,
+ // On non-Darwin platforms R9 is callee-saved.
+ Defs = [R0, R1, R2, R3, R12, LR,
+ D0, D1, D2, D3, D4, D5, D6, D7,
+ D16, D17, D18, D19, D20, D21, D22, D23,
+ D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+ Uses = [SP] in {
+ def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+ IIC_Br, "bl\t$func",
+ [(ARMcall tglobaladdr:$func)]>,
+ Requires<[IsARM, IsNotDarwin]> {
+ let Inst{31-28} = 0b1110;
+ bits<24> func;
+ let Inst{23-0} = func;
+ }
+
+ def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+ IIC_Br, "bl", "\t$func",
+ [(ARMcall_pred tglobaladdr:$func)]>,
+ Requires<[IsARM, IsNotDarwin]> {
+ bits<24> func;
+ let Inst{23-0} = func;
+ }
+
+ // ARMv5T and above
+ def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+ IIC_Br, "blx\t$func",
+ [(ARMcall GPR:$func)]>,
+ Requires<[IsARM, HasV5T, IsNotDarwin]> {
+ bits<4> func;
+ let Inst{31-4} = 0b1110000100101111111111110011;
+ let Inst{3-0} = func;
+ }
+
+ // ARMv4T
+ // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+ def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, HasV4T, IsNotDarwin]>;
+
+ // ARMv4
+ def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, NoV4T, IsNotDarwin]>;
+}
+
+let isCall = 1,
+ // On Darwin R9 is call-clobbered.
+ // R7 is marked as a use to prevent frame-pointer assignments from being
+ // moved above / below calls.
+ Defs = [R0, R1, R2, R3, R9, R12, LR,
+ D0, D1, D2, D3, D4, D5, D6, D7,
+ D16, D17, D18, D19, D20, D21, D22, D23,
+ D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+ Uses = [R7, SP] in {
+ def BLr9 : ABXI<0b1011, (outs), (ins bltarget:$func, variable_ops),
+ IIC_Br, "bl\t$func",
+ [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]> {
+ let Inst{31-28} = 0b1110;
+ bits<24> func;
+ let Inst{23-0} = func;
+ }
+
+ def BLr9_pred : ABI<0b1011, (outs), (ins bltarget:$func, variable_ops),
+ IIC_Br, "bl", "\t$func",
+ [(ARMcall_pred tglobaladdr:$func)]>,
+ Requires<[IsARM, IsDarwin]> {
+ bits<24> func;
+ let Inst{23-0} = func;
+ }
+
+ // ARMv5T and above
+ def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+ IIC_Br, "blx\t$func",
+ [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> {
+ bits<4> func;
+ let Inst{31-4} = 0b1110000100101111111111110011;
+ let Inst{3-0} = func;
+ }
+
+ // ARMv4T
+ // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+ def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, HasV4T, IsDarwin]>;
+
+ // ARMv4
+ def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, NoV4T, IsDarwin]>;
+}
+
+// Tail calls.
+
+// FIXME: These should probably be xformed into the non-TC versions of the
+// instructions as part of MC lowering.
+// FIXME: These seem to be used for both Thumb and ARM instruction selection.
+// Thumb should have its own version since the instruction is actually
+// different, even though the mnemonic is the same.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ // Darwin versions.
+ let Defs = [R0, R1, R2, R3, R9, R12,
+ D0, D1, D2, D3, D4, D5, D6, D7,
+ D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26,
+ D27, D28, D29, D30, D31, PC],
+ Uses = [SP] in {
+ def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsDarwin]>;
+
+ def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsDarwin]>;
+
+ def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b\t$dst @ TAILCALL",
+ []>, Requires<[IsARM, IsDarwin]>;
+
+ def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b.w\t$dst @ TAILCALL",
+ []>, Requires<[IsThumb, IsDarwin]>;
+
+ def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops),
+ BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL",
+ []>, Requires<[IsDarwin]> {
+ bits<4> dst;
+ let Inst{31-4} = 0b1110000100101111111111110001;
+ let Inst{3-0} = dst;
+ }
+ }
+
+ // Non-Darwin versions (the difference is R9).
+ let Defs = [R0, R1, R2, R3, R12,
+ D0, D1, D2, D3, D4, D5, D6, D7,
+ D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26,
+ D27, D28, D29, D30, D31, PC],
+ Uses = [SP] in {
+ def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsNotDarwin]>;
+
+ def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsNotDarwin]>;
+
+ def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b\t$dst @ TAILCALL",
+ []>, Requires<[IsARM, IsNotDarwin]>;
+
+ def TAILJMPdNDt : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b.w\t$dst @ TAILCALL",
+ []>, Requires<[IsThumb, IsNotDarwin]>;
+
+ def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops),
+ BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL",
+ []>, Requires<[IsNotDarwin]> {
+ bits<4> dst;
+ let Inst{31-4} = 0b1110000100101111111111110001;
+ let Inst{3-0} = dst;
+ }
+ }
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ // B is "predicable" since it can be xformed into a Bcc.
+ let isBarrier = 1 in {
+ let isPredicable = 1 in
+ def B : ABXI<0b1010, (outs), (ins brtarget:$target), IIC_Br,
+ "b\t$target", [(br bb:$target)]> {
+ bits<24> target;
+ let Inst{31-28} = 0b1110;
+ let Inst{23-0} = target;
+ }
+
+ let isNotDuplicable = 1, isIndirectBranch = 1 in {
+ def BR_JTr : ARMPseudoInst<(outs),
+ (ins GPR:$target, i32imm:$jt, i32imm:$id),
+ SizeSpecial, IIC_Br,
+ [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
+ // FIXME: This shouldn't use the generic "addrmode2," but rather be split
+ // into i12 and rs suffixed versions.
+ def BR_JTm : ARMPseudoInst<(outs),
+ (ins addrmode2:$target, i32imm:$jt, i32imm:$id),
+ SizeSpecial, IIC_Br,
+ [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
+ imm:$id)]>;
+ def BR_JTadd : ARMPseudoInst<(outs),
+ (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id),
+ SizeSpecial, IIC_Br,
+ [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
+ imm:$id)]>;
+ } // isNotDuplicable = 1, isIndirectBranch = 1
+ } // isBarrier = 1
+
+ // FIXME: should be able to write a pattern for ARMBrcond, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
+ IIC_Br, "b", "\t$target",
+ [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
+ bits<24> target;
+ let Inst{23-0} = target;
+ }
+}
+
+// Branch and Exchange Jazelle -- for disassembly only
+def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-20} = 0b0010;
+ //let Inst{19-8} = 0xfff;
+ let Inst{7-4} = 0b0010;
+}
+
+// Secure Monitor Call is a system instruction -- for disassembly only
+def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> opt;
+ let Inst{23-4} = 0b01100000000000000111;
+ let Inst{3-0} = opt;
+}
+
+// Supervisor Call (Software Interrupt) -- for disassembly only
+let isCall = 1, Uses = [SP] in {
+def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<24> svc;
+ let Inst{23-0} = svc;
+}
+}
+
+// Store Return State is a system instruction -- for disassembly only
+let isCodeGenOnly = 1 in { // FIXME: This should not use submode!
+def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
+ NoItinerary, "srs${amode}\tsp!, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b110; // W = 1
+}
+
+def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
+ NoItinerary, "srs${amode}\tsp, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b100; // W = 0
+}
+
+// Return From Exception is a system instruction -- for disassembly only
+def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
+ NoItinerary, "rfe${amode}\t$base!",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b011; // W = 1
+}
+
+def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
+ NoItinerary, "rfe${amode}\t$base",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b001; // W = 0
+}
+} // isCodeGenOnly = 1
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+// Load
+
+
+defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si,
+ UnOpFrag<(load node:$Src)>>;
+defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
+ UnOpFrag<(zextloadi8 node:$Src)>>;
+defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si,
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm STRB : AI_str1<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// Special LDR for loads from non-pc-relative constpools.
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
+ isReMaterializable = 1 in
+def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr",
+ []> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+}
+
+// Loads with zero extension
+def LDRH : AI3ld<0b1011, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrh", "\t$Rt, $addr",
+ [(set GPR:$Rt, (zextloadi16 addrmode3:$addr))]>;
+
+// Loads with sign extension
+def LDRSH : AI3ld<0b1111, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrsh", "\t$Rt, $addr",
+ [(set GPR:$Rt, (sextloadi16 addrmode3:$addr))]>;
+
+def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr",
+ [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
+ isCodeGenOnly = 1 in { // $dst2 doesn't exist in asmstring?
+// FIXME: $dst2 isn't in the asm string as it's implied by $Rd (dst2 = Rd+1)
+// how to represent that such that tblgen is happy and we don't
+// mark this codegen only?
+// Load doubleword
+def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
+ (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_d_r, "ldrd", "\t$Rd, $addr",
+ []>, Requires<[IsARM, HasV5TE]>;
+}
+
+// Indexed loads
+multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
+ def _PRE : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addrmode2:$addr), IndexModePre, LdFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ // {17-14} Rn
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<18> addr;
+ let Inst{25} = addr{13};
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{17-14};
+ let Inst{11-0} = addr{11-0};
+ }
+ def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins GPR:$Rn, am2offset:$offset),
+ IndexModePost, LdFrm, itin,
+ opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> Rn;
+ let Inst{25} = offset{13};
+ let Inst{23} = offset{12};
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = offset{11-0};
+ }
+}
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_ru>;
+defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>;
+}
+
+multiclass AI3_ldridx<bits<4> op, bit op20, string opc, InstrItinClass itin> {
+ def _PRE : AI3ldstidx<op, op20, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addrmode3:$addr), IndexModePre,
+ LdMiscFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ }
+ def _POST : AI3ldstidx<op, op20, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins GPR:$Rn, am3offset:$offset), IndexModePost,
+ LdMiscFrm, itin,
+ opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
+ bits<10> offset;
+ bits<4> Rn;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = Rn;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ }
+}
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LDRH : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>;
+defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>;
+defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>;
+let hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+defm LDRD : AI3_ldridx<0b1101, 0, "ldrd", IIC_iLoad_d_ru>;
+} // mayLoad = 1, neverHasSideEffects = 1
+
+// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base, am2offset:$offset), IndexModeNone,
+ LdFrm, IIC_iLoad_ru,
+ "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
+}
+def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base, am2offset:$offset), IndexModeNone,
+ LdFrm, IIC_iLoad_bh_ru,
+ "ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
+}
+def LDRSBT : AI3ldstidx<0b1101, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base, am3offset:$offset), IndexModePost,
+ LdMiscFrm, IIC_iLoad_bh_ru,
+ "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
+}
+def LDRHT : AI3ldstidx<0b1011, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base, am3offset:$offset), IndexModePost,
+ LdMiscFrm, IIC_iLoad_bh_ru,
+ "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
+}
+def LDRSHT : AI3ldstidx<0b1111, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base, am3offset:$offset), IndexModePost,
+ LdMiscFrm, IIC_iLoad_bh_ru,
+ "ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
+}
+}
+
+// Store
+
+// Stores with truncate
+def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm,
+ IIC_iStore_bh_r, "strh", "\t$Rt, $addr",
+ [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>;
+
+// Store doubleword
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1,
+ isCodeGenOnly = 1 in // $src2 doesn't exist in asm string
+def STRD : AI3str<0b1111, (outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
+ StMiscFrm, IIC_iStore_d_r,
+ "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
+
+// Indexed stores
+def STR_PRE : AI2stridx<0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+ IndexModePre, StFrm, IIC_iStore_ru,
+ "str", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+ [(set GPR:$Rn_wb,
+ (pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
+
+def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+ IndexModePost, StFrm, IIC_iStore_ru,
+ "str", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+ [(set GPR:$Rn_wb,
+ (post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
+
+def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+ IndexModePre, StFrm, IIC_iStore_bh_ru,
+ "strb", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+ [(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt,
+ GPR:$Rn, am2offset:$offset))]>;
+def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+ IndexModePost, StFrm, IIC_iStore_bh_ru,
+ "strb", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+ [(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt,
+ GPR:$Rn, am2offset:$offset))]>;
+
+def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
+ IndexModePre, StMiscFrm, IIC_iStore_ru,
+ "strh", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+ [(set GPR:$Rn_wb,
+ (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>;
+
+def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
+ "strh", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+ [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
+ GPR:$Rn, am3offset:$offset))]>;
+
+// For disassembly only
+def STRD_PRE : AI3stdpr<(outs GPR:$base_wb),
+ (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
+ StMiscFrm, IIC_iStore_d_ru,
+ "strd", "\t$src1, $src2, [$base, $offset]!",
+ "$base = $base_wb", []>;
+
+// For disassembly only
+def STRD_POST: AI3stdpo<(outs GPR:$base_wb),
+ (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
+ StMiscFrm, IIC_iStore_d_ru,
+ "strd", "\t$src1, $src2, [$base], $offset",
+ "$base = $base_wb", []>;
+
+// STRT, STRBT, and STRHT are for disassembly only.
+
+def STRT : AI2stridx<0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn,am2offset:$offset),
+ IndexModeNone, StFrm, IIC_iStore_ru,
+ "strt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{21} = 1; // overwrite
+}
+
+def STRBT : AI2stridx<1, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+ IndexModeNone, StFrm, IIC_iStore_bh_ru,
+ "strbt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{21} = 1; // overwrite
+}
+
+def STRHT: AI3sthpo<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base,am3offset:$offset),
+ StMiscFrm, IIC_iStore_bh_ru,
+ "strht", "\t$src, [$base], $offset", "$base = $base_wb",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{21} = 1; // overwrite
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
+ InstrItinClass itin, InstrItinClass itin_upd> {
+ def IA :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def IA_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def DA :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b00; // Decrement After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DA_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b00; // Decrement After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def DB :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DB_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def IB :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b11; // Increment Before
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def IB_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b11; // Increment Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
+
+} // neverHasSideEffects
+
+// Load / Store Multiple Mnemonic Aliases
+def : MnemonicAlias<"ldm", "ldmia">;
+def : MnemonicAlias<"stm", "stmia">;
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+// FIXME: Should be a pseudo-instruction.
+def LDMIA_RET : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+ reglist:$regs, variable_ops),
+ IndexModeUpd, LdStMulFrm, IIC_iLoad_mBr,
+ "ldmia${p}\t$Rn!, $regs",
+ "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = 1; // Load
+}
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+
+let neverHasSideEffects = 1 in
+def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
+ "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+}
+
+// A version for the smaller set of tail call registers.
+let neverHasSideEffects = 1 in
+def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
+ IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+}
+
+def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
+ DPSoRegFrm, IIC_iMOVsr,
+ "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>,
+ UnaryDP {
+ bits<4> Rd;
+ bits<12> src;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = src;
+ let Inst{25} = 0;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
+ "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-0} = imm;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm),
+ DPFrm, IIC_iMOVi,
+ "movw", "\t$Rd, $imm",
+ [(set GPR:$Rd, imm0_65535:$imm)]>,
+ Requires<[IsARM, HasV6T2]>, UnaryDP {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+ let Inst{19-16} = imm{15-12};
+ let Inst{20} = 0;
+ let Inst{25} = 1;
+}
+
+def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm),
+ DPFrm, IIC_iMOVi,
+ "movt", "\t$Rd, $imm",
+ [(set GPR:$Rd,
+ (or (and GPR:$src, 0xffff),
+ lo16AllZero:$imm))]>, UnaryDP,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+ let Inst{19-16} = imm{15-12};
+ let Inst{20} = 0;
+ let Inst{25} = 1;
+}
+
+def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+ (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+} // Constraints
+
+def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
+ Requires<[IsARM, HasV6T2]>;
+
+let Uses = [CPSR] in
+def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
+ [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
+ Requires<[IsARM]>;
+
+// These aren't really mov instructions, but we have to define them this way
+// due to flag operands.
+
+let Defs = [CPSR] in {
+def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP,
+ Requires<[IsARM]>;
+def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP,
+ Requires<[IsARM]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Extend Instructions.
+//
+
+// Sign extenders
+
+defm SXTB : AI_ext_rrot<0b01101010,
+ "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm SXTH : AI_ext_rrot<0b01101011,
+ "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+
+defm SXTAB : AI_exta_rrot<0b01101010,
+ "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm SXTAH : AI_exta_rrot<0b01101011,
+ "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+
+// For disassembly only
+defm SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
+
+// For disassembly only
+defm SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+defm UXTB : AI_ext_rrot<0b01101110,
+ "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm UXTH : AI_ext_rrot<0b01101111,
+ "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm UXTB16 : AI_ext_rrot<0b01101100,
+ "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
+// (UXTB16r_rot GPR:$Src, 24)>;
+def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
+ (UXTB16r_rot GPR:$Src, 8)>;
+
+defm UXTAB : AI_exta_rrot<0b01101110, "uxtab",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm UXTAH : AI_exta_rrot<0b01101111, "uxtah",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+}
+
+// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
+// For disassembly only
+defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
+
+
+def SBFX : I<(outs GPR:$Rd),
+ (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+ AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
+ let Inst{27-21} = 0b0111101;
+ let Inst{6-4} = 0b101;
+ let Inst{20-16} = width;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{3-0} = Rn;
+}
+
+def UBFX : I<(outs GPR:$Rd),
+ (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+ AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
+ let Inst{27-21} = 0b0111111;
+ let Inst{6-4} = 0b101;
+ let Inst{20-16} = width;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{3-0} = Rn;
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+defm ADD : AsI1_bin_irs<0b0100, "add",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
+defm SUB : AsI1_bin_irs<0b0010, "sub",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// ADD and SUB with 's' bit set.
+defm ADDS : AI1_bin_s_irs<0b0100, "adds",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
+defm SUBS : AI1_bin_s_irs<0b0010, "subs",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+defm ADC : AI1_adde_sube_irs<0b0101, "adc",
+ BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
+defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
+ BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
+
+// ADC and SUBC with 's' bit set.
+defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
+ BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
+defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
+ BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
+
+def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (sub so_imm:$imm, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+}
+
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSBrr : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ IIC_iALUr, "rsb", "\t$Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+}
+
+def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, "rsb", "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (sub so_reg:$shift, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+}
+
+// RSB with 's' bit set.
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+def RSBSri : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ IIC_iALUi, "rsbs", "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+}
+def RSBSrs : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, "rsbs", "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+}
+}
+
+let Uses = [CPSR] in {
+def RSCri : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, "rsc", "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+}
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSCrr : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, "rsc", "\t$Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+}
+def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, "rsc", "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+}
+}
+
+// FIXME: Allow these to be predicated.
+let isCodeGenOnly = 1, Defs = [CPSR], Uses = [CPSR] in {
+def RSCSri : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, "rscs\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+}
+def RSCSrs : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, "rscs\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+}
+}
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
+ (SUBri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(addc GPR:$src, so_imm_neg:$imm),
+ (SUBSri GPR:$src, so_imm_neg:$imm)>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+def : ARMPat<(adde GPR:$src, so_imm_not:$imm),
+ (SBCri GPR:$src, so_imm_not:$imm)>;
+
+// Note: These are implemented in C++ code, because they have to generate
+// ADD/SUBrs instructions, which use a complex pattern that a xform function
+// cannot produce.
+// (mul X, 2^n+1) -> (add (X << n), X)
+// (mul X, 2^n-1) -> (rsb X, (X << n))
+
+// ARM Arithmetic Instruction -- for disassembly only
+// GPR:$dst = GPR:$a op GPR:$b
+class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
+ list<dag> pattern = [/* For disassembly only; pattern left blank */],
+ dag iops = (ins GPR:$Rn, GPR:$Rm), string asm = "\t$Rd, $Rn, $Rm">
+ : AI<(outs GPR:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{27-20} = op27_20;
+ let Inst{11-4} = op11_4;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def QADD : AAI<0b00010000, 0b00000101, "qadd",
+ [(set GPR:$Rd, (int_arm_qadd GPR:$Rm, GPR:$Rn))],
+ (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def QSUB : AAI<0b00010010, 0b00000101, "qsub",
+ [(set GPR:$Rd, (int_arm_qsub GPR:$Rm, GPR:$Rn))],
+ (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [], (ins GPR:$Rm, GPR:$Rn),
+ "\t$Rd, $Rm, $Rn">;
+def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [], (ins GPR:$Rm, GPR:$Rn),
+ "\t$Rd, $Rm, $Rn">;
+
+def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">;
+def QADD8 : AAI<0b01100010, 0b11111001, "qadd8">;
+def QASX : AAI<0b01100010, 0b11110011, "qasx">;
+def QSAX : AAI<0b01100010, 0b11110101, "qsax">;
+def QSUB16 : AAI<0b01100010, 0b11110111, "qsub16">;
+def QSUB8 : AAI<0b01100010, 0b11111111, "qsub8">;
+def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">;
+def UQADD8 : AAI<0b01100110, 0b11111001, "uqadd8">;
+def UQASX : AAI<0b01100110, 0b11110011, "uqasx">;
+def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">;
+def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">;
+def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def SASX : AAI<0b01100001, 0b11110011, "sasx">;
+def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">;
+def SADD8 : AAI<0b01100001, 0b11111001, "sadd8">;
+def SSAX : AAI<0b01100001, 0b11110101, "ssax">;
+def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">;
+def SSUB8 : AAI<0b01100001, 0b11111111, "ssub8">;
+def UASX : AAI<0b01100101, 0b11110011, "uasx">;
+def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">;
+def UADD8 : AAI<0b01100101, 0b11111001, "uadd8">;
+def USAX : AAI<0b01100101, 0b11110101, "usax">;
+def USUB16 : AAI<0b01100101, 0b11110111, "usub16">;
+def USUB8 : AAI<0b01100101, 0b11111111, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def SHASX : AAI<0b01100011, 0b11110011, "shasx">;
+def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">;
+def SHADD8 : AAI<0b01100011, 0b11111001, "shadd8">;
+def SHSAX : AAI<0b01100011, 0b11110101, "shsax">;
+def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">;
+def SHSUB8 : AAI<0b01100011, 0b11111111, "shsub8">;
+def UHASX : AAI<0b01100111, 0b11110011, "uhasx">;
+def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">;
+def UHADD8 : AAI<0b01100111, 0b11111001, "uhadd8">;
+def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">;
+def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">;
+def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">;
+
+// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+
+def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ MulFrm /* for convenience */, NoItinerary, "usad8",
+ "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{27-20} = 0b01111000;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b0001;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ MulFrm /* for convenience */, NoItinerary, "usada8",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<4> Ra;
+ let Inst{27-20} = 0b01111000;
+ let Inst{7-4} = 0b0001;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+// Signed/Unsigned saturate -- for disassembly only
+
+def SSAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<5> sat_imm;
+ bits<4> Rn;
+ bits<8> sh;
+ let Inst{27-21} = 0b0110101;
+ let Inst{5-4} = 0b01;
+ let Inst{20-16} = sat_imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{7-3};
+ let Inst{6} = sh{0};
+ let Inst{3-0} = Rn;
+}
+
+def SSAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$Rn), SatFrm,
+ NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> sat_imm;
+ bits<4> Rn;
+ let Inst{27-20} = 0b01101010;
+ let Inst{11-4} = 0b11110011;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = sat_imm;
+ let Inst{3-0} = Rn;
+}
+
+def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<5> sat_imm;
+ bits<4> Rn;
+ bits<8> sh;
+ let Inst{27-21} = 0b0110111;
+ let Inst{5-4} = 0b01;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{7-3};
+ let Inst{6} = sh{0};
+ let Inst{20-16} = sat_imm;
+ let Inst{3-0} = Rn;
+}
+
+def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm,
+ NoItinerary, "usat16", "\t$Rd, $sat_imm, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> sat_imm;
+ bits<4> Rn;
+ let Inst{27-20} = 0b01101110;
+ let Inst{11-4} = 0b11110011;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = sat_imm;
+ let Inst{3-0} = Rn;
+}
+
+def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Bitwise Instructions.
+//
+
+defm AND : AsI1_bin_irs<0b0000, "and",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+defm ORR : AsI1_bin_irs<0b1100, "orr",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
+defm EOR : AsI1_bin_irs<0b0001, "eor",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
+defm BIC : AsI1_bin_irs<0b1110, "bic",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+
+def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
+ AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfc", "\t$Rd, $imm", "$src = $Rd",
+ [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<10> imm;
+ let Inst{27-21} = 0b0111110;
+ let Inst{6-0} = 0b0011111;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = imm{4-0}; // lsb
+ let Inst{20-16} = imm{9-5}; // width
+}
+
+// A8.6.18 BFI - Bitfield insert (Encoding A1)
+def BFI : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
+ AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
+ [(set GPR:$Rd, (ARMbfi GPR:$src, GPR:$Rn,
+ bf_inv_mask_imm:$imm))]>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<10> imm;
+ let Inst{27-21} = 0b0111110;
+ let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = imm{4-0}; // lsb
+ let Inst{20-16} = imm{9-5}; // width
+ let Inst{3-0} = Rn;
+}
+
+// GNU as only supports this form of bfi (w/ 4 arguments)
+let isAsmParserOnly = 1 in
+def BFI4p : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn,
+ lsb_pos_imm:$lsb, width_imm:$width),
+ AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfi", "\t$Rd, $Rn, $lsb, $width", "$src = $Rd",
+ []>, Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
+ let Inst{27-21} = 0b0111110;
+ let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{20-16} = width; // Custom encoder => lsb+width-1
+ let Inst{3-0} = Rn;
+}
+
+def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
+ "mvn", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+}
+def MVNs : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg:$shift), DPSoRegFrm,
+ IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+ [(set GPR:$Rd, (not so_reg:$shift))]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = shift;
+}
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
+ IIC_iMVNi, "mvn", "\t$Rd, $imm",
+ [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP {
+ bits<4> Rd;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+}
+
+def : ARMPat<(and GPR:$src, so_imm_not:$imm),
+ (BICri GPR:$src, so_imm_not:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Multiply Instructions.
+//
+class AsMul1I32<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+let isCommutable = 1 in {
+let Constraints = "@earlyclobber $Rd" in
+def MULv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+ pred:$p, cc_out:$s),
+ Size4Bytes, IIC_iMUL32,
+ [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, NoV6]>;
+
+def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]>;
+}
+
+let Constraints = "@earlyclobber $Rd" in
+def MLAv5: ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
+ Size4Bytes, IIC_iMAC32,
+ [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+ Requires<[IsARM, NoV6]> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+
+def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<4> Ra;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+// Extra precision multiplies with low / high results
+
+let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ Size4Bytes, IIC_iMUL64, []>,
+ Requires<[IsARM, NoV6]>;
+
+def UMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ Size4Bytes, IIC_iMUL64, []>,
+ Requires<[IsARM, NoV6]>;
+}
+
+def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+ "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+
+def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+}
+
+// Multiply + accumulate
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ Size4Bytes, IIC_iMAC64, []>,
+ Requires<[IsARM, NoV6]>;
+def UMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ Size4Bytes, IIC_iMAC64, []>,
+ Requires<[IsARM, NoV6]>;
+def UMAALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ Size4Bytes, IIC_iMAC64, []>,
+ Requires<[IsARM, NoV6]>;
+
+}
+
+def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+
+def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdLo;
+ let Inst{15-12} = RdHi;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+} // neverHasSideEffects
+
+// Most significant word multiply
+def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{15-12} = 0b1111;
+}
+
+def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{15-12} = 0b1111;
+}
+
+def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+ Requires<[IsARM, HasV6]>;
+
+def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]>;
+
+def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (sub GPR:$Ra, (mulhs GPR:$Rn, GPR:$Rm)))]>,
+ Requires<[IsARM, HasV6]>;
+
+def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]>;
+
+multiclass AI_smul<string opc, PatFrag opnode> {
+ def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ (sext_inreg GPR:$Rm, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ (sra GPR:$Rm, (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ (sext_inreg GPR:$Rm, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ (sra GPR:$Rm, (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+ (sext_inreg GPR:$Rm, i16)), (i32 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+ (sra GPR:$Rm, (i32 16))), (i32 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+}
+
+
+multiclass AI_smla<string opc, PatFrag opnode> {
+ def BB : AMulxyIa<0b0001000, 0b00, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra,
+ (opnode (sext_inreg GPR:$Rn, i16),
+ (sext_inreg GPR:$Rm, i16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def BT : AMulxyIa<0b0001000, 0b10, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPR:$Rn, i16),
+ (sra GPR:$Rm, (i32 16)))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TB : AMulxyIa<0b0001000, 0b01, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
+ (sext_inreg GPR:$Rm, i16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TT : AMulxyIa<0b0001000, 0b11, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
+ (sra GPR:$Rm, (i32 16)))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WB : AMulxyIa<0b0001001, 0b00, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
+ (sext_inreg GPR:$Rm, i16)), (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WT : AMulxyIa<0b0001001, 0b10, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
+ (sra GPR:$Rm, (i32 16))), (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+}
+
+defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only
+def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV5TE]>;
+
+def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV5TE]>;
+
+def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV5TE]>;
+
+def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV5TE]>;
+
+// Helper class for AI_smld -- for disassembly only
+class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{4} = 1;
+ let Inst{5} = swap;
+ let Inst{6} = sub;
+ let Inst{7} = 0;
+ let Inst{21-20} = 0b00;
+ let Inst{22} = long;
+ let Inst{27-23} = 0b01110;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> Rd;
+ let Inst{15-12} = 0b1111;
+ let Inst{19-16} = Rd;
+}
+class AMulDualIa<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+}
+
+multiclass AI_smld<bit sub, string opc> {
+
+ def D : AMulDualIa<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">;
+
+ def DX: AMulDualIa<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">;
+
+ def LD: AMulDualI64<1, sub, 0, (outs GPR:$RdLo,GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+ !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">;
+
+ def LDX : AMulDualI64<1, sub, 1, (outs GPR:$RdLo,GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+ !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">;
+
+}
+
+defm SMLA : AI_smld<0, "smla">;
+defm SMLS : AI_smld<1, "smls">;
+
+multiclass AI_sdml<bit sub, string opc> {
+
+ def D : AMulDualI<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
+ def DX : AMulDualI<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
+}
+
+defm SMUA : AI_sdml<0, "smua">;
+defm SMUS : AI_sdml<1, "smus">;
+
+//===----------------------------------------------------------------------===//
+// Misc. Arithmetic Instructions.
+//
+
+def CLZ : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "clz", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>;
+
+def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rbit", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
+ Requires<[IsARM, HasV6T2]>;
+
+def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rev", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
+
+def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rev16", "\t$Rd, $Rm",
+ [(set GPR:$Rd,
+ (or (and (srl GPR:$Rm, (i32 8)), 0xFF),
+ (or (and (shl GPR:$Rm, (i32 8)), 0xFF00),
+ (or (and (srl GPR:$Rm, (i32 8)), 0xFF0000),
+ (and (shl GPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+ Requires<[IsARM, HasV6]>;
+
+def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "revsh", "\t$Rd, $Rm",
+ [(set GPR:$Rd,
+ (sext_inreg
+ (or (srl (and GPR:$Rm, 0xFF00), (i32 8)),
+ (shl GPR:$Rm, (i32 8))), i16))]>,
+ Requires<[IsARM, HasV6]>;
+
+def lsl_shift_imm : SDNodeXForm<imm, [{
+ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
+ return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+
+def lsl_amt : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() < 32);
+}], lsl_shift_imm>;
+
+def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+ IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+ [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF),
+ (and (shl GPR:$Rm, lsl_amt:$sh),
+ 0xFFFF0000)))]>,
+ Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (and GPR:$Rm, 0xFFFF0000)),
+ (PKHBT GPR:$Rn, GPR:$Rm, 0)>;
+def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (shl GPR:$Rm, imm16_31:$sh)),
+ (PKHBT GPR:$Rn, GPR:$Rm, (lsl_shift_imm imm16_31:$sh))>;
+
+def asr_shift_imm : SDNodeXForm<imm, [{
+ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue());
+ return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+
+def asr_amt : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() <= 32);
+}], asr_shift_imm>;
+
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def PKHTB : APKHI<0b01101000, 1, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+ IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+ [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF0000),
+ (and (sra GPR:$Rm, asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHTB where identities eliminate some nodes. Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)),
+ (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
+ (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>;
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+
+defm CMP : AI1_cmp_irs<0b1010, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+
+// ARMcmpZ can re-use the above instruction definitions.
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm),
+ (CMPri GPR:$src, so_imm:$imm)>;
+def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs),
+ (CMPrr GPR:$src, GPR:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg:$rhs),
+ (CMPrs GPR:$src, so_reg:$rhs)>;
+
+// FIXME: We have to be careful when using the CMN instruction and comparison
+// with 0. One would expect these two pieces of code should give identical
+// results:
+//
+// rsbs r1, r1, 0
+// cmp r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// and:
+//
+// cmn r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// However, the CMN gives the *opposite* result when r1 is 0. This is because
+// the carry flag is set in the CMP case but not in the CMN case. In short, the
+// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the
+// value of r0 and the carry bit (because the "carry bit" parameter to
+// AddWithCarry is defined as 1 in this case, the carry flag will always be set
+// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is
+// never a "carry" when this AddWithCarry is performed (because the "carry bit"
+// parameter to AddWithCarry is defined as 0).
+//
+// When x is 0 and unsigned:
+//
+// x = 0
+// ~x = 0xFFFF FFFF
+// ~x + 1 = 0x1 0000 0000
+// (-x = 0) != (0x1 0000 0000 = ~x + 1)
+//
+// Therefore, we should disable CMN when comparing against zero, until we can
+// limit when the CMN instruction is used (when we know that the RHS is not 0 or
+// when it's a comparison which doesn't look at the 'carry' flag).
+//
+// (See the ARM docs for the "AddWithCarry" pseudo-code.)
+//
+// This is related to <rdar://problem/7569620>.
+//
+//defm CMN : AI1_cmp_irs<0b1011, "cmn",
+// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+
+// Note that TST/TEQ don't set all the same flags that CMP does!
+defm TST : AI1_cmp_irs<0b1000, "tst",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>;
+defm TEQ : AI1_cmp_irs<0b1001, "teq",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
+
+defm CMNz : AI1_cmp_irs<0b1011, "cmn",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
+ BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
+
+//def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
+// (CMNri GPR:$src, so_imm_neg:$imm)>;
+
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
+ (CMNzri GPR:$src, so_imm_neg:$imm)>;
+
+// Pseudo i64 compares for some floating point compares.
+let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
+ Defs = [CPSR] in {
+def BCCi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+ IIC_Br,
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
+
+def BCCZi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br,
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
+} // usesCustomInserter
+
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :(
+// FIXME: These should all be pseudo-instructions that get expanded to
+// the normal MOV instructions. That would fix the dependency on
+// special casing them in tblgen.
+let neverHasSideEffects = 1 in {
+def MOVCCr : AI1<0b1101, (outs GPR:$Rd), (ins GPR:$false, GPR:$Rm), DPFrm,
+ IIC_iCMOVr, "mov", "\t$Rd, $Rm",
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{25} = 0;
+ let Inst{20} = 0;
+ let Inst{15-12} = Rd;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+}
+
+def MOVCCs : AI1<0b1101, (outs GPR:$Rd),
+ (ins GPR:$false, so_reg:$shift), DPSoRegFrm, IIC_iCMOVsr,
+ "mov", "\t$Rd, $shift",
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">, UnaryDP {
+ bits<4> Rd;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = shift;
+}
+
+let isMoveImm = 1 in
+def MOVCCi16 : AI1<0b1000, (outs GPR:$Rd), (ins GPR:$false, i32imm_hilo16:$imm),
+ DPFrm, IIC_iMOVi,
+ "movw", "\t$Rd, $imm",
+ []>,
+ RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>,
+ UnaryDP {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 0;
+ let Inst{19-16} = imm{15-12};
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+}
+
+let isMoveImm = 1 in
+def MOVCCi : AI1<0b1101, (outs GPR:$Rd),
+ (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi,
+ "mov", "\t$Rd, $imm",
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">, UnaryDP {
+ bits<4> Rd;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+}
+
+// Two instruction predicate mov immediate.
+let isMoveImm = 1 in
+def MOVCCi32imm : PseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, i32imm:$src, pred:$p),
+ IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
+
+let isMoveImm = 1 in
+def MVNCCi : AI1<0b1111, (outs GPR:$Rd),
+ (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi,
+ "mvn", "\t$Rd, $imm",
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">, UnaryDP {
+ bits<4> Rd;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+}
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+def memb_opt : Operand<i32> {
+ let PrintMethod = "printMemBOption";
+ let ParserMatchClass = MemBarrierOptOperand;
+}
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+ "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf57ff05;
+ let Inst{3-0} = opt;
+}
+
+def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
+ "mcr", "\tp15, 0, $zero, c7, c10, 5",
+ [(ARMMemBarrierMCR GPR:$zero)]>,
+ Requires<[IsARM, HasV6]> {
+ // FIXME: add encoding
+}
+}
+
+def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+ "dsb", "\t$opt",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf57ff04;
+ let Inst{3-0} = opt;
+}
+
+// ISB has only full system option -- for disassembly only
+def ISB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>,
+ Requires<[IsARM, HasDB]> {
+ let Inst{31-4} = 0xf57ff06;
+ let Inst{3-0} = 0b1111;
+}
+
+let usesCustomInserter = 1 in {
+ let Uses = [CPSR] in {
+ def ATOMIC_LOAD_ADD_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_ADD_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_ADD_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
+
+ def ATOMIC_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
+ def ATOMIC_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
+ def ATOMIC_SWAP_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
+}
+}
+
+let mayLoad = 1 in {
+def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
+ "ldrexb", "\t$Rt, [$Rn]",
+ []>;
+def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
+ "ldrexh", "\t$Rt, [$Rn]",
+ []>;
+def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
+ "ldrex", "\t$Rt, [$Rn]",
+ []>;
+def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins GPR:$Rn),
+ NoItinerary,
+ "ldrexd", "\t$Rt, $Rt2, [$Rn]",
+ []>;
+}
+
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$src, GPR:$Rn),
+ NoItinerary,
+ "strexb", "\t$Rd, $src, [$Rn]",
+ []>;
+def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn),
+ NoItinerary,
+ "strexh", "\t$Rd, $Rt, [$Rn]",
+ []>;
+def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn),
+ NoItinerary,
+ "strex", "\t$Rd, $Rt, [$Rn]",
+ []>;
+def STREXD : AIstrex<0b01, (outs GPR:$Rd),
+ (ins GPR:$Rt, GPR:$Rt2, GPR:$Rn),
+ NoItinerary,
+ "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]",
+ []>;
+}
+
+// Clear-Exclusive is for disassembly only.
+def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV7]> {
+ let Inst{31-0} = 0b11110101011111111111000000011111;
+}
+
+// SWP/SWPB are deprecated in V6/V7 and for disassembly only.
+let mayLoad = 1 in {
+def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swp",
+ [/* For disassembly only; pattern left blank */]>;
+def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb",
+ [/* For disassembly only; pattern left blank */]>;
+}
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
+let isCall = 1,
+ Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
+ def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
+ [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+// eh_sjlj_setjmp() is an instruction sequence to store the return
+// address and save #0 in R0 for the non-longjmp case.
+// Since by its nature we may be coming from some other function to get
+// here, and we're using the stack frame for the containing function to
+// save/restore registers, we can't keep anything live in regs across
+// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+// when we get here from a longjmp(). We force everthing out of registers
+// except for our own input by listing the relevant registers in Defs. By
+// doing so, we also cause the prologue/epilogue code to actively preserve
+// all of the callee-saved resgisters, which is exactly what we want.
+// A constant value is passed in $val, and we use the location as a scratch.
+//
+// These are pseudo-instructions and are lowered to individual MC-insts, so
+// no encoding information is necessary.
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0,
+ D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
+ D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
+ D31 ], hasSideEffects = 1, isBarrier = 1 in {
+ def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+ NoItinerary,
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+ Requires<[IsARM, HasVFP2]>;
+}
+
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
+ hasSideEffects = 1, isBarrier = 1 in {
+ def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+ NoItinerary,
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+ Requires<[IsARM, NoVFP]>;
+}
+
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+ Defs = [ R7, LR, SP ] in {
+def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
+ NoItinerary,
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsARM, IsDarwin]>;
+}
+
+// eh.sjlj.dispatchsetup pseudo-instruction.
+// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are
+// handled when the pseudo is expanded (which happens before any passes
+// that need the instruction size).
+let isBarrier = 1, hasSideEffects = 1 in
+def Int_eh_sjlj_dispatchsetup :
+ PseudoInst<(outs), (ins GPR:$src), NoItinerary,
+ [(ARMeh_sjlj_dispatchsetup GPR:$src)]>,
+ Requires<[IsDarwin]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// Large immediate handling.
+
+// 32-bit immediate using two piece so_imms or movw + movt.
+// This is a single pseudo instruction, the benefit is that it can be remat'd
+// as a single unit instead of having to handle reg inputs.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+ [(set GPR:$dst, (arm_i32imm:$src))]>,
+ Requires<[IsARM]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if PIC).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2addpc,
+ [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsARM, UseMovt]>;
+
+def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2,
+ [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+ Requires<[IsARM, UseMovt]>;
+
+let AddedComplexity = 10 in
+def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2ld,
+ [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
+ Requires<[IsARM, UseMovt]>;
+} // isReMaterializable
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
+ Requires<[IsARM, DontUseMovt]>;
+def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
+ Requires<[IsARM, UseMovt]>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// TODO: add,sub,and, 3-instr forms?
+
+// Tail calls
+def : ARMPat<(ARMtcret tcGPR:$dst),
+ (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+ (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+ (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret tcGPR:$dst),
+ (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+ (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+ (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+// Direct calls
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
+ Requires<[IsARM, IsNotDarwin]>;
+def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>,
+ Requires<[IsARM, IsDarwin]>;
+
+// zextload i1 -> zextload i8
+def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(zextloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+
+// extload -> zextload
+def : ARMPat<(extloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+def : ARMPat<(extloadi8 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi8 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+
+def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>;
+
+def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
+def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
+
+// smul* and smla*
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra GPR:$b, (i32 16))),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (i32 16)),
+ (SMULWB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
+ (SMULWB GPR:$a, GPR:$b)>;
+
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul sext_16_node:$a, sext_16_node:$b)),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra GPR:$b, (i32 16)))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra GPR:$a, (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (i32 16))),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+
+//===----------------------------------------------------------------------===//
+// Thumb Support
+//
+
+include "ARMInstrThumb.td"
+
+//===----------------------------------------------------------------------===//
+// Thumb2 Support
+//
+
+include "ARMInstrThumb2.td"
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//
+
+include "ARMInstrVFP.td"
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD (NEON) Support
+//
+
+include "ARMInstrNEON.td"
+
+//===----------------------------------------------------------------------===//
+// Coprocessor Instructions. For disassembly only.
+//
+
+def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+class ACI<dag oops, dag iops, string opc, string asm>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, NoItinerary,
+ opc, asm, "", [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-25} = 0b110;
+}
+
+multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
+
+ def _OFFSET : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+ opc, "\tp$cop, cr$CRd, $addr"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def _PRE : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+ opc, "\tp$cop, cr$CRd, $addr!"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def _POST : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
+ opc, "\tp$cop, cr$CRd, [$base], $offset"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def _OPTION : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, i32imm:$option),
+ opc, "\tp$cop, cr$CRd, [$base], $option"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def L_OFFSET : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+ !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+
+ def L_PRE : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+ !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr!"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+
+ def L_POST : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
+ !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $offset"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+
+ def L_OPTION : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option),
+ !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $option"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+}
+
+defm LDC : LdStCop<{?,?,?,?}, 1, "ldc">;
+defm LDC2 : LdStCop<0b1111, 1, "ldc2">;
+defm STC : LdStCop<{?,?,?,?}, 0, "stc">;
+defm STC2 : LdStCop<0b1111, 0, "stc2">;
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register -- for disassembly only
+//
+
+class MovRCopro<string opc, bit direction>
+ : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ NoItinerary, opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>;
+def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>;
+
+class MovRCopro2<string opc, bit direction>
+ : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */>;
+def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */>;
+
+class MovRRCopro<string opc, bit direction>
+ : ABI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+ NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */>;
+def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
+
+class MovRRCopro2<string opc, bit direction>
+ : ABXI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+ NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"),
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */>;
+def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>;
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+
+// Move to ARM core register from Special Register
+def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ let Inst{23-16} = 0b00001111;
+ let Inst{15-12} = Rd;
+ let Inst{7-4} = 0b0000;
+}
+
+def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ let Inst{23-16} = 0b01001111;
+ let Inst{15-12} = Rd;
+ let Inst{7-4} = 0b0000;
+}
+
+// Move from ARM core register to Special Register
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
+ "msr", "\t$mask, $Rn",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<5> mask;
+ bits<4> Rn;
+
+ let Inst{23} = 0;
+ let Inst{22} = mask{4}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = mask{3-0};
+ let Inst{15-12} = 0b1111;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rn;
+}
+
+def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
+ "msr", "\t$mask, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<5> mask;
+ bits<12> a;
+
+ let Inst{23} = 0;
+ let Inst{22} = mask{4}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = mask{3-0};
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = a;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
new file mode 100644
index 0000000..1e2e550
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -0,0 +1,4838 @@
+//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM NEON instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// NEON-specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
+def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
+
+def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
+def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
+def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
+def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
+def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
+def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
+def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
+def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
+def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
+def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
+def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
+
+// Types for vector shift by immediates. The "SHX" version is for long and
+// narrow operations where the source and destination vectors have different
+// types. The "SHINS" version is for shift and insert operations.
+def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVT<2, i32>]>;
+def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+
+def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>;
+def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
+def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
+def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>;
+def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>;
+def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>;
+def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
+
+def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
+def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
+def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
+
+def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
+def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
+def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
+def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
+def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
+def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
+
+def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
+def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
+def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
+
+def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
+def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
+
+def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
+ SDTCisVT<2, i32>]>;
+def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
+def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
+
+def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
+def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+
+def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
+def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
+
+def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
+
+// VDUPLANE can produce a quad-register result from a double-register source,
+// so the result is not constrained to match the source.
+def NEONvduplane : SDNode<"ARMISD::VDUPLANE",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisVT<2, i32>]>>;
+
+def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
+
+def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
+def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
+def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
+def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
+
+def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>;
+def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
+def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
+def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+
+def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
+def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+
+def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
+def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
+
+def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+ unsigned EltBits = 0;
+ uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+ return (EltBits == 32 && EltVal == 0);
+}]>;
+
+def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+ unsigned EltBits = 0;
+ uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+ return (EltBits == 8 && EltVal == 0xff);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// NEON operand definitions
+//===----------------------------------------------------------------------===//
+
+def nModImm : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// NEON load / store instructions
+//===----------------------------------------------------------------------===//
+
+// Use VLDM to load a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
+def VLDMQIA
+ : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
+ IIC_fpLoad_m, "",
+ [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
+def VLDMQDB
+ : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
+ IIC_fpLoad_m, "",
+ [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
+
+// Use VSTM to store a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
+def VSTMQIA
+ : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
+ IIC_fpStore_m, "",
+ [(store (v2f64 QPR:$src), GPR:$Rn)]>;
+def VSTMQDB
+ : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
+ IIC_fpStore_m, "",
+ [(store (v2f64 QPR:$src), GPR:$Rn)]>;
+
+// Classes for VLD* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), itin,
+ "$addr.addr = $wb">;
+class VLDQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), itin,
+ "$addr.addr = $wb">;
+class VLDQQQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src), itin,"">;
+class VLDQQQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+ "$addr.addr = $wb, $src = $dst">;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+// VLD1 : Vector Load (multiple single elements)
+class VLD1D<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd),
+ (ins addrmode6:$Rn), IIC_VLD1,
+ "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+class VLD1Q<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6:$Rn), IIC_VLD1x2,
+ "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
+def VLD1d16 : VLD1D<{0,1,0,?}, "16">;
+def VLD1d32 : VLD1D<{1,0,0,?}, "32">;
+def VLD1d64 : VLD1D<{1,1,0,?}, "64">;
+
+def VLD1q8 : VLD1Q<{0,0,?,?}, "8">;
+def VLD1q16 : VLD1Q<{0,1,?,?}, "16">;
+def VLD1q32 : VLD1Q<{1,0,?,?}, "32">;
+def VLD1q64 : VLD1Q<{1,1,?,?}, "64">;
+
+def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>;
+
+// ...with address register writeback:
+class VLD1DWB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u,
+ "vld1", Dt, "\\{$Vd\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+class VLD1QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD1d8_UPD : VLD1DWB<{0,0,0,?}, "8">;
+def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">;
+def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">;
+def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">;
+
+def VLD1q8_UPD : VLD1QWB<{0,0,?,?}, "8">;
+def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">;
+def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">;
+def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">;
+
+def VLD1q8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+
+// ...with 3 registers (some of these are only for the disassembler):
+class VLD1D3<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+class VLD1D3WB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD1d8T : VLD1D3<{0,0,0,?}, "8">;
+def VLD1d16T : VLD1D3<{0,1,0,?}, "16">;
+def VLD1d32T : VLD1D3<{1,0,0,?}, "32">;
+def VLD1d64T : VLD1D3<{1,1,0,?}, "64">;
+
+def VLD1d8T_UPD : VLD1D3WB<{0,0,0,?}, "8">;
+def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">;
+def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">;
+def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">;
+
+def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>;
+
+// ...with 4 registers (some of these are only for the disassembler):
+class VLD1D4<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+class VLD1D4WB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0010,op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb",
+ []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">;
+def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">;
+def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">;
+def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">;
+
+def VLD1d8Q_UPD : VLD1D4WB<{0,0,?,?}, "8">;
+def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">;
+def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">;
+def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">;
+
+def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>;
+
+// VLD2 : Vector Load (multiple 2-element structures)
+class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6:$Rn), IIC_VLD2,
+ "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+class VLD2Q<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, 0b0011, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD2x2,
+ "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">;
+def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16">;
+def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32">;
+
+def VLD2q8 : VLD2Q<{0,0,?,?}, "8">;
+def VLD2q16 : VLD2Q<{0,1,?,?}, "16">;
+def VLD2q32 : VLD2Q<{1,0,?,?}, "32">;
+
+def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>;
+def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
+def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>;
+
+def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+
+// ...with address register writeback:
+class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
+ "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+class VLD2QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, 0b0011, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
+ "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">;
+def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">;
+def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">;
+
+def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8">;
+def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">;
+def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">;
+
+def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+
+def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+
+// ...with double-spaced registers (for disassembly only):
+def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8">;
+def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16">;
+def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32">;
+def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8">;
+def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">;
+def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">;
+
+// VLD3 : Vector Load (multiple 3-element structures)
+class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn), IIC_VLD3,
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
+def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
+def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
+
+def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
+
+// ...with address register writeback:
+class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
+def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
+def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+
+// ...with double-spaced registers:
+def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
+def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
+def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
+def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
+def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
+def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+
+def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+
+// VLD4 : Vector Load (multiple 4-element structures)
+class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD4,
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
+def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
+def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
+
+def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
+
+// ...with address register writeback:
+class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
+def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
+def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+
+// ...with double-spaced registers:
+def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
+def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
+def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
+def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
+def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
+def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+
+def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// Classes for VLD*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst),
+ (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst),
+ (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst),
+ (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+
+// VLD1LN : Vector Load (single element to one lane)
+class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag LoadOp>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
+ (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
+ IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
+ "$src = $Vd",
+ [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
+ (i32 (LoadOp addrmode6:$Rn)),
+ imm:$lane))]> {
+ let Rm = 0b1111;
+}
+class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
+ let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
+ (i32 (LoadOp addrmode6:$addr)),
+ imm:$lane))];
+}
+
+def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VLD1LNd32 : VLD1LN<0b1000, {?,0,?,?}, "32", v2i32, load> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{4};
+ let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
+def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
+def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
+
+def : Pat<(vector_insert (v2f32 DPR:$src),
+ (f32 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(vector_insert (v4f32 QPR:$src),
+ (f32 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+// ...with address register writeback:
+class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
+ "\\{$Vd[$lane]\\}, $Rn$Rm",
+ "$src = $Vd, $Rn.addr = $wb", []>;
+
+def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{4};
+ let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+
+// VLD2LN : Vector Load (single 2-element structure to one lane)
+class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+ IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+
+// ...with double-spaced registers:
+def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+
+// ...with address register writeback:
+class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
+ "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
+ "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+
+def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+
+// VLD3LN : Vector Load (single 3-element structure to one lane)
+class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
+ let Rm = 0b1111;
+}
+
+def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+
+// ...with double-spaced registers:
+def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+
+// ...with address register writeback:
+class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+ IIC_VLD3lnu, "vld3", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
+ []>;
+
+def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+
+def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+
+// VLD4LN : Vector Load (single 4-element structure to one lane)
+class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+
+// ...with double-spaced registers:
+def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+
+// ...with address register writeback:
+class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+ IIC_VLD4lnu, "vld4", Dt,
+"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
+"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
+ []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+
+def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// VLD1DUP : Vector Load (single element to all lanes)
+class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
+ IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
+ [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
+ let Pattern = [(set QPR:$dst,
+ (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
+}
+
+def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
+def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
+def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
+
+def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>;
+def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
+def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
+
+def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPd32 addrmode6:$addr)>;
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPq32Pseudo addrmode6:$addr)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+class VLD1QDUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6dup:$Rn), IIC_VLD1dup,
+ "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD1DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+class VLD1QDUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">;
+def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
+def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
+
+def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">;
+def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
+def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
+
+def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+
+// VLD2DUP : Vector Load (single 2-element structure to all lanes)
+class VLD2DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6dup:$Rn), IIC_VLD2dup,
+ "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">;
+
+def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8">;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD2DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu,
+ "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">;
+def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">;
+def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">;
+
+def VLD2DUPd8x2_UPD : VLD2DUPWB<{0,0,1,0}, "8">;
+def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">;
+def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">;
+
+def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+
+// VLD3DUP : Vector Load (single 3-element structure to all lanes)
+class VLD3DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6dup:$Rn), IIC_VLD3dup,
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
+def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
+def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
+
+def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">;
+def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">;
+def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD3DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
+def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
+def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
+
+def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
+def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
+def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
+
+def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+
+// VLD4DUP : Vector Load (single 4-element structure to all lanes)
+class VLD4DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1111, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6dup:$Rn), IIC_VLD4dup,
+ "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
+def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
+def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">;
+def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">;
+def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+// ...with address register writeback:
+class VLD4DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1111, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
+ "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
+def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
+def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
+def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
+def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+
+// Classes for VST* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
+class VSTQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
+class VSTQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
+class VSTQQQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+ "$addr.addr = $wb">;
+
+// VST1 : Vector Store (multiple single elements)
+class VST1D<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd),
+ IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+class VST1Q<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b00,0b1010,op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2,
+ "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1d8 : VST1D<{0,0,0,?}, "8">;
+def VST1d16 : VST1D<{0,1,0,?}, "16">;
+def VST1d32 : VST1D<{1,0,0,?}, "32">;
+def VST1d64 : VST1D<{1,1,0,?}, "64">;
+
+def VST1q8 : VST1Q<{0,0,?,?}, "8">;
+def VST1q16 : VST1Q<{0,1,?,?}, "16">;
+def VST1q32 : VST1Q<{1,0,?,?}, "32">;
+def VST1q64 : VST1Q<{1,1,?,?}, "64">;
+
+def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>;
+def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>;
+def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>;
+def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>;
+
+// ...with address register writeback:
+class VST1DWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u,
+ "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+class VST1QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
+ IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1d8_UPD : VST1DWB<{0,0,0,?}, "8">;
+def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">;
+def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">;
+def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">;
+
+def VST1q8_UPD : VST1QWB<{0,0,?,?}, "8">;
+def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">;
+def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">;
+def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">;
+
+def VST1q8Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+
+// ...with 3 registers (some of these are only for the disassembler):
+class VST1D3<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
+ IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+class VST1D3WB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3),
+ IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VST1d8T : VST1D3<{0,0,0,?}, "8">;
+def VST1d16T : VST1D3<{0,1,0,?}, "16">;
+def VST1d32T : VST1D3<{1,0,0,?}, "32">;
+def VST1d64T : VST1D3<{1,1,0,?}, "64">;
+
+def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">;
+def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
+def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
+def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
+
+def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
+
+// ...with 4 registers (some of these are only for the disassembler):
+class VST1D4<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
+ []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+class VST1D4WB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
+ "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
+def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
+def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
+def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
+
+def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">;
+def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
+def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
+def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
+
+def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
+
+// VST2 : Vector Store (multiple 2-element structures)
+class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
+ IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+class VST2Q<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">;
+def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">;
+def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">;
+
+def VST2q8 : VST2Q<{0,0,?,?}, "8">;
+def VST2q16 : VST2Q<{0,1,?,?}, "16">;
+def VST2q32 : VST2Q<{1,0,?,?}, "32">;
+
+def VST2d8Pseudo : VSTQPseudo<IIC_VST2>;
+def VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
+def VST2d32Pseudo : VSTQPseudo<IIC_VST2>;
+
+def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
+
+// ...with address register writeback:
+class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
+ IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+class VST2QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
+ "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">;
+def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
+def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
+
+def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">;
+def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
+def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
+
+def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+
+def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+
+// ...with double-spaced registers (for disassembly only):
+def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">;
+def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">;
+def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">;
+def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">;
+def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
+def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
+
+// VST3 : Vector Store (multiple 3-element structures)
+class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
+ "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
+def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
+def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
+
+def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
+
+// ...with address register writeback:
+class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
+ "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
+def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
+def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+
+// ...with double-spaced registers:
+def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
+def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
+def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
+def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
+def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
+def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+
+def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+
+// VST4 : Vector Store (multiple 4-element structures)
+class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
+def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
+def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
+
+def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
+
+// ...with address register writeback:
+class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
+ "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
+def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
+def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+
+// ...with double-spaced registers:
+def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
+def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
+def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
+def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
+def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
+def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+
+def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+// Classes for VST*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+
+// VST1LN : Vector Store (single element from one lane)
+class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag StoreOp, SDNode ExtractOp>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane),
+ IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
+ [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
+ let Rm = 0b1111;
+}
+class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+ : VSTQLNPseudo<IIC_VST1ln> {
+ let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+ addrmode6:$addr)];
+}
+
+def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
+ NEONvgetlaneu> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
+ NEONvgetlaneu> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{5};
+}
+def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> {
+ let Inst{7} = lane{0};
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
+def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
+def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
+
+def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
+ (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
+ (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+
+// ...with address register writeback:
+class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
+ "\\{$Vd[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []>;
+
+def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{5};
+}
+def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
+def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
+def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
+
+// VST2LN : Vector Store (single 2-element structure from one lane)
+class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
+ IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+
+// ...with double-spaced registers:
+def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{4} = Rn{4};
+}
+
+def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+
+// ...with address register writeback:
+class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
+ "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
+ "$addr.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+
+def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+
+// VST3LN : Vector Store (single 3-element structure from one lane)
+class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+}
+
+def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+
+// ...with double-spaced registers:
+def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+
+// ...with address register writeback:
+class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+ IIC_VST3lnu, "vst3", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []>;
+
+def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+
+def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+
+// VST4LN : Vector Store (single 4-element structure from one lane)
+class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+
+// ...with double-spaced registers:
+def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+
+// ...with address register writeback:
+class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+ IIC_VST4lnu, "vst4", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+
+def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+
+//===----------------------------------------------------------------------===//
+// NEON pattern fragments
+//===----------------------------------------------------------------------===//
+
+// Extract D sub-registers of Q registers.
+def DSubReg_i8_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
+}]>;
+def DSubReg_i16_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
+}]>;
+def DSubReg_i32_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
+}]>;
+def DSubReg_f64_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
+}]>;
+
+// Extract S sub-registers of Q/D registers.
+def SSubReg_f32_reg : SDNodeXForm<imm, [{
+ assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
+}]>;
+
+// Translate lane numbers from Q registers to D subregs.
+def SubReg_i8_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
+}]>;
+def SubReg_i16_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
+}]>;
+def SubReg_i32_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Classes
+//===----------------------------------------------------------------------===//
+
+// Basic 2-register operations: double- and quad-register.
+class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
+class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
+
+// Basic 2-register intrinsics, both double- and quad-register.
+class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Narrow 2-register operations.
+class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyD, ValueType TyQ, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
+
+// Narrow 2-register intrinsics.
+class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyD, ValueType TyQ, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
+
+// Long 2-register operations (currently only used for VMOVL).
+class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
+
+// Long 2-register intrinsics.
+class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
+
+// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
+class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
+ : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
+ (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
+ OpcodeStr, Dt, "$Vd, $Vm",
+ "$src1 = $Vd, $src2 = $Vm", []>;
+class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
+ InstrItinClass itin, string OpcodeStr, string Dt>
+ : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
+ (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
+ "$src1 = $Vd, $src2 = $Vm", []>;
+
+// Basic 3-register operations: double- and quad-register.
+class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+// Same as N3VD but no data type.
+class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr,
+ ValueType ResTy, ValueType OpTy,
+ SDNode OpNode, bit Commutable>
+ : N3VX<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
+ let isCommutable = Commutable;
+}
+
+class N3VDSL<bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
+ : N3V<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+ : N3V<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+
+class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+ : N3VX<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
+ let isCommutable = Commutable;
+}
+class N3VQSL<bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode ShOp>
+ : N3V<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode ShOp>
+ : N3V<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+
+// Basic 3-register intrinsics, both double- and quad-register.
+class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
+ : N3V<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (IntOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
+ : N3V<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (IntOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
+ let isCommutable = 0;
+}
+
+class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
+ let isCommutable = 0;
+}
+
+// Multiply-Add/Sub operations: double- and quad-register.
+class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+ (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
+
+class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
+ : N3V<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$src1),
+ (Ty (MulOp DPR:$Vn,
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+ imm:$lane)))))))]>;
+class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType Ty, SDNode MulOp, SDNode ShOp>
+ : N3V<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$src1),
+ (Ty (MulOp DPR:$Vn,
+ (Ty (NEONvduplane (Ty DPR_8:$Vm),
+ imm:$lane)))))))]>;
+
+class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
+ SDPatternOperator MulOp, SDPatternOperator OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+ (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
+class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator MulOp, SDPatternOperator ShOp>
+ : N3V<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$src1),
+ (ResTy (MulOp QPR:$Vn,
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))))]>;
+class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy,
+ SDNode MulOp, SDNode ShOp>
+ : N3V<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$src1),
+ (ResTy (MulOp QPR:$Vn,
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))))]>;
+
+// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
+class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+ (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
+class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+ (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
+
+// Neon 3-argument intrinsics, both double- and quad-register.
+// The destination register is also used as the first source operand register.
+class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
+ (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
+ (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
+
+// Long Multiply-Add/Sub operations.
+class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm)))))]>;
+class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
+ imm:$lane))))))]>;
+class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_8:$Vm),
+ imm:$lane))))))]>;
+
+// Long Intrinsic-Op vector operations with explicit extend (VABAL).
+class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+ (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm)))))))]>;
+
+// Neon Long 3-argument intrinsic. The destination register is
+// a quad-register and is also used as the first source operand register.
+class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
+class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$src1),
+ (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]>;
+class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$src1),
+ (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]>;
+
+// Narrowing 3-register intrinsics.
+class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
+ Intrinsic IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register operations.
+class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set QPR:$Vd,
+ (TyQ (OpNode (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
+class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set QPR:$Vd,
+ (TyQ (OpNode (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
+
+// Long 3-register operations with explicitly extended operands.
+class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
+ (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics with explicit extend (VABDL).
+class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm))))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics.
+class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]>;
+class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]>;
+
+// Wide 3-register operations.
+class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
+ SDNode OpNode, SDNode ExtOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
+ (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+ let isCommutable = Commutable;
+}
+
+// Pairwise long 2-register intrinsics, both double- and quad-register.
+class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Pairwise long 2-register accumulate intrinsics,
+// both double- and quad-register.
+// The destination register is also used as the first source operand register.
+class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
+class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
+
+// Shift by immediate,
+// both double- and quad-register.
+class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
+class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
+
+// Long shift by immediate.
+class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, op6, op4,
+ (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
+ IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
+ (i32 imm:$SIMM))))]>;
+
+// Narrow shift by immediate.
+class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, op6, op4,
+ (outs DPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
+ (i32 imm:$SIMM))))]>;
+
+// Shift right by immediate and accumulate,
+// both double- and quad-register.
+class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (add DPR:$src1,
+ (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
+class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (add QPR:$src1,
+ (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
+
+// Shift by immediate and insert,
+// both double- and quad-register.
+class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
+class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiQ,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
+
+// Convert, with fractional bits immediate,
+// both double- and quad-register.
+class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ Intrinsic IntOp>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+ IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
+class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ Intrinsic IntOp>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+ IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
+
+//===----------------------------------------------------------------------===//
+// Multiclasses
+//===----------------------------------------------------------------------===//
+
+// Abbreviations used in multiclass suffixes:
+// Q = quarter int (8 bit) elements
+// H = half int (16 bit) elements
+// S = single int (32 bit) elements
+// D = double int (64 bit) elements
+
+// Neon 2-register vector operations and intrinsics.
+
+// Neon 2-register comparisons.
+// source operand element sizes of 8, 16 and 32 bits:
+multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4, string opc, string Dt,
+ string asm, SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
+ def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
+ def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
+ def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, "f32", asm, "",
+ [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+
+ // 128-bit vector types.
+ def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
+ def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
+ def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
+ def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, "f32", asm, "",
+ [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+}
+
+
+// Neon 2-register vector intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
+ def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
+ def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
+ def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
+}
+
+
+// Neon Narrowing 2-register vector operations,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, OpNode>;
+ def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, OpNode>;
+ def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, OpNode>;
+}
+
+// Neon Narrowing 2-register vector intrinsics,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp> {
+ def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, IntOp>;
+ def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, IntOp>;
+ def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, IntOp>;
+}
+
+
+// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
+ def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+
+// Neon 3-register vector operations.
+
+// First with only element sizes of 8, 16 and 32 bits:
+multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
+ // 64-bit vector types.
+ def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, OpNode, Commutable>;
+ def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, OpNode, Commutable>;
+ def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, OpNode, Commutable>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, OpNode, Commutable>;
+ def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, OpNode, Commutable>;
+ def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, OpNode, Commutable>;
+}
+
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
+ def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, ShOp>;
+ def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
+ v2i32, ShOp>;
+ def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v4i16, ShOp>;
+ def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
+ v4i32, v2i32, ShOp>;
+}
+
+// ....then also with element size 64 bits:
+multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0>
+ : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
+ OpcodeStr, Dt, OpNode, Commutable> {
+ def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, OpNode, Commutable>;
+ def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, OpNode, Commutable>;
+}
+
+
+// Neon 3-register vector intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0> {
+ // 64-bit vector types.
+ def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, IntOp, Commutable>;
+ def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, IntOp, Commutable>;
+
+ // 128-bit vector types.
+ def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, IntOp, Commutable>;
+ def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, IntOp, Commutable>;
+}
+multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, IntOp>;
+ def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, IntOp>;
+ def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, IntOp>;
+}
+
+multiclass N3VIntSL_HS<bits<4> op11_8,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
+ def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
+ def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
+ def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0>
+ : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp, Commutable> {
+ def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, IntOp, Commutable>;
+ def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, IntOp, Commutable>;
+}
+multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp>
+ : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp> {
+ def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, IntOp>;
+ def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, IntOp>;
+}
+
+
+// ....then also with element size of 64 bits:
+multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0>
+ : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp, Commutable> {
+ def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, IntOp, Commutable>;
+ def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, IntOp, Commutable>;
+}
+multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp>
+ : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp> {
+ def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, IntOp>;
+ def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, IntOp>;
+}
+
+// Neon Narrowing 3-register vector intrinsics,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0> {
+ def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, IntOp, Commutable>;
+ def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, IntOp, Commutable>;
+ def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, IntOp, Commutable>;
+}
+
+
+// Neon Long 3-register vector operations.
+
+multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
+ def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, Commutable>;
+ def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, Commutable>;
+ def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, Commutable>;
+}
+
+multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
+// Neon Long 3-register vector intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0> {
+ def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, Commutable>;
+ def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, Commutable>;
+}
+
+multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp> {
+ def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
+ def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0>
+ : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
+ IntOp, Commutable> {
+ def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, Commutable>;
+}
+
+// ....with explicit extend (VABDL).
+multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, ExtOp, Commutable>;
+ def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, ExtOp, Commutable>;
+ def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, ExtOp, Commutable>;
+}
+
+
+// Neon Wide 3-register vector intrinsics,
+// source operand element sizes of 8, 16 and 32 bits:
+multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
+
+// Neon Multiply-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
+ def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
+ def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
+ def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
+ def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
+}
+
+multiclass N3VMulOpSL_HS<bits<4> op11_8,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDNode ShOp> {
+ def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
+ def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
+ def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
+ mul, ShOp>;
+ def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
+ mul, ShOp>;
+}
+
+// Neon Intrinsic-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, Intrinsic IntOp,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
+ def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
+ def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
+ def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
+ def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
+}
+
+// Neon 3-argument intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
+ def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+ def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
+ def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
+}
+
+
+// Neon Long Multiply-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, SDNode MulOp,
+ SDNode OpNode> {
+ def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
+ def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
+ string Dt, SDNode MulOp, SDNode OpNode> {
+ def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
+ !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+
+// Neon Long 3-argument intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
+ def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
+ OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
+ def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, Intrinsic IntOp>
+ : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
+ def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
+}
+
+// ....with explicit extend (VABAL).
+multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> {
+ def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
+ IntOp, ExtOp, OpNode>;
+ def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
+ IntOp, ExtOp, OpNode>;
+ def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
+ IntOp, ExtOp, OpNode>;
+}
+
+
+// Neon Pairwise long 2-register intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
+ def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
+ def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
+ def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
+ def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
+}
+
+
+// Neon Pairwise long 2-register accumulate intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
+ def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
+ def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
+ def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
+ def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
+}
+
+
+// Neon 2-register vector shift by immediate,
+// with f of either N2RegVShLFrm or N2RegVShRFrm
+// element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode, Format f> {
+ // 64-bit vector types.
+ def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, f, itin,
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, f, itin,
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
+ // imm6 = xxxxxx
+}
+
+// Neon Shift-Accumulate vector operations,
+// element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, SDNode ShOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4,
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4,
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
+ // imm6 = xxxxxx
+}
+
+
+// Neon Shift-Insert vector operations,
+// with f of either N2RegVShLFrm or N2RegVShRFrm
+// element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, SDNode ShOp,
+ Format f> {
+ // 64-bit vector types.
+ def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4,
+ f, OpcodeStr, "8", v8i8, ShOp> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4,
+ f, OpcodeStr, "16", v4i16, ShOp> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4,
+ f, OpcodeStr, "32", v2i32, ShOp> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4,
+ f, OpcodeStr, "64", v1i64, ShOp>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4,
+ f, OpcodeStr, "8", v16i8, ShOp> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4,
+ f, OpcodeStr, "16", v8i16, ShOp> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4,
+ f, OpcodeStr, "32", v4i32, ShOp> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4,
+ f, OpcodeStr, "64", v2i64, ShOp>;
+ // imm6 = xxxxxx
+}
+
+// Neon Shift Long operations,
+// element sizes of 8, 16, 32 bits:
+multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
+ bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
+ def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+}
+
+// Neon Shift Narrow operations,
+// element sizes of 16, 32, 64 bits:
+multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
+ bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+ OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+//===----------------------------------------------------------------------===//
+
+// Vector Add Operations.
+
+// VADD : Vector Add (integer and floating-point)
+defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
+ add, 1>;
+def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
+ v2f32, v2f32, fadd, 1>;
+def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
+ v4f32, v4f32, fadd, 1>;
+// VADDL : Vector Add Long (Q = D + D)
+defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "s", add, sext, 1>;
+defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "u", add, zext, 1>;
+// VADDW : Vector Add Wide (Q = Q + D)
+defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
+defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
+// VHADD : Vector Halving Add
+defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhadd", "s", int_arm_neon_vhadds, 1>;
+defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhadd", "u", int_arm_neon_vhaddu, 1>;
+// VRHADD : Vector Rounding Halving Add
+defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vrhadd", "s", int_arm_neon_vrhadds, 1>;
+defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
+// VQADD : Vector Saturating Add
+defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqadd", "s", int_arm_neon_vqadds, 1>;
+defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqadd", "u", int_arm_neon_vqaddu, 1>;
+// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
+defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
+ int_arm_neon_vaddhn, 1>;
+// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
+defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
+ int_arm_neon_vraddhn, 1>;
+
+// Vector Multiply Operations.
+
+// VMUL : Vector Multiply (integer, polynomial and floating-point)
+defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
+def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
+ "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
+def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
+ "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
+def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
+ v2f32, v2f32, fmul, 1>;
+def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
+ v4f32, v4f32, fmul, 1>;
+defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
+def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
+def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
+ v2f32, fmul>;
+
+def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+ (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
+ (v4i16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+ (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
+ (v2i32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
+ (v4f32 (VMULslfq (v4f32 QPR:$src1),
+ (v2f32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
+defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q,
+ "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
+defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q,
+ "vqdmulh", "s", int_arm_neon_vqdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+ imm:$lane)))),
+ (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
+ (v4i16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+ imm:$lane)))),
+ (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
+ (v2i32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
+defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
+ IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
+ "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
+defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q,
+ "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+ imm:$lane)))),
+ (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
+ (v4i16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+ imm:$lane)))),
+ (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
+ (v2i32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
+defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "s", NEONvmulls, 1>;
+defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "u", NEONvmullu, 1>;
+def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
+ v8i16, v8i8, int_arm_neon_vmullp, 1>;
+defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
+defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
+
+// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
+defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vqdmull", "s", int_arm_neon_vqdmull, 1>;
+defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
+ "vqdmull", "s", int_arm_neon_vqdmull>;
+
+// Vector Multiply-Accumulate and Multiply-Subtract Operations.
+
+// VMLA : Vector Multiply Accumulate (integer and floating-point)
+defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
+ v4f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
+ v4f32, v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+def : Pat<(v8i16 (add (v8i16 QPR:$src1),
+ (mul (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (add (v4i32 QPR:$src1),
+ (mul (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
+ (fmul_su (v4f32 QPR:$src2),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (v4f32 (VMLAslfq (v4f32 QPR:$src1),
+ (v4f32 QPR:$src2),
+ (v2f32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
+defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "s", NEONvmulls, add>;
+defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "u", NEONvmullu, add>;
+
+defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
+defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
+
+// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
+defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+ "vqdmlal", "s", int_arm_neon_vqdmlal>;
+defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
+
+// VMLS : Vector Multiply Subtract (integer and floating-point)
+defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
+ v4f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
+ v4f32, v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
+ (mul (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
+ (mul (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
+ (fmul_su (v4f32 QPR:$src2),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
+ (v2f32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
+defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "u", NEONvmullu, sub>;
+
+defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
+
+// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
+defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
+ "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+
+// Vector Subtract Operations.
+
+// VSUB : Vector Subtract (integer and floating-point)
+defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
+ "vsub", "i", sub, 0>;
+def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
+ v2f32, v2f32, fsub, 0>;
+def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
+ v4f32, v4f32, fsub, 0>;
+// VSUBL : Vector Subtract Long (Q = D - D)
+defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "s", sub, sext, 0>;
+defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "u", sub, zext, 0>;
+// VSUBW : Vector Subtract Wide (Q = Q - D)
+defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
+defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
+// VHSUB : Vector Halving Subtract
+defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vhsub", "s", int_arm_neon_vhsubs, 0>;
+defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vhsub", "u", int_arm_neon_vhsubu, 0>;
+// VQSUB : Vector Saturing Subtract
+defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vqsub", "s", int_arm_neon_vqsubs, 0>;
+defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vqsub", "u", int_arm_neon_vqsubu, 0>;
+// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
+defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
+ int_arm_neon_vsubhn, 0>;
+// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
+defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
+ int_arm_neon_vrsubhn, 0>;
+
+// Vector Comparisons.
+
+// VCEQ : Vector Compare Equal
+defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
+def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
+ NEONvceq, 1>;
+def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
+ NEONvceq, 1>;
+
+defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
+ "$Vd, $Vm, #0", NEONvceqz>;
+
+// VCGE : Vector Compare Greater Than or Equal
+defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
+defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
+def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
+ NEONvcge, 0>;
+def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
+ NEONvcge, 0>;
+
+defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
+ "$Vd, $Vm, #0", NEONvcgez>;
+defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
+ "$Vd, $Vm, #0", NEONvclez>;
+
+// VCGT : Vector Compare Greater Than
+defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
+defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
+def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
+ NEONvcgt, 0>;
+def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
+ NEONvcgt, 0>;
+
+defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
+ "$Vd, $Vm, #0", NEONvcgtz>;
+defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
+ "$Vd, $Vm, #0", NEONvcltz>;
+
+// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
+def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+ "f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
+def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+ "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
+// VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
+def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+ "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
+def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+ "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
+// VTST : Vector Test Bits
+defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+ IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
+
+// Vector Bitwise Operations.
+
+def vnotd : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
+def vnotq : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
+
+
+// VAND : Vector Bitwise AND
+def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
+ v2i32, v2i32, and, 1>;
+def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
+ v4i32, v4i32, and, 1>;
+
+// VEOR : Vector Bitwise Exclusive OR
+def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
+ v2i32, v2i32, xor, 1>;
+def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
+ v4i32, v4i32, xor, 1>;
+
+// VORR : Vector Bitwise OR
+def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
+ v2i32, v2i32, or, 1>;
+def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
+ v4i32, v4i32, or, 1>;
+
+def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+
+// VBIC : Vector Bitwise Bit Clear (AND NOT)
+def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+ "vbic", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
+ (vnotd DPR:$Vm))))]>;
+def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+ "vbic", "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
+ (vnotq QPR:$Vm))))]>;
+
+def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+// VORN : Vector Bitwise OR NOT
+def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+ "vorn", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
+ (vnotd DPR:$Vm))))]>;
+def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+ "vorn", "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
+ (vnotq QPR:$Vm))))]>;
+
+// VMVN : Vector Bitwise NOT (Immediate)
+
+let isReMaterializable = 1 in {
+
+def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i16", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i16", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+}
+
+// VMVN : Vector Bitwise NOT
+def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
+ "vmvn", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
+def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
+ "vmvn", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
+def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
+def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
+
+// VBSL : Vector Bitwise Select
+def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VCNTiD,
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (or (and DPR:$Vn, DPR:$src1),
+ (and DPR:$Vm, (vnotd DPR:$src1)))))]>;
+def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VCNTiQ,
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (or (and QPR:$Vn, QPR:$src1),
+ (and QPR:$Vm, (vnotq QPR:$src1)))))]>;
+
+// VBIF : Vector Bitwise Insert if False
+// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
+def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VBINiD,
+ "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [/* For disassembly only; pattern left blank */]>;
+def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VBINiQ,
+ "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [/* For disassembly only; pattern left blank */]>;
+
+// VBIT : Vector Bitwise Insert if True
+// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
+def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VBINiD,
+ "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [/* For disassembly only; pattern left blank */]>;
+def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VBINiQ,
+ "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [/* For disassembly only; pattern left blank */]>;
+
+// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
+// for equivalent operations with different register constraints; it just
+// inserts copies.
+
+// Vector Absolute Differences.
+
+// VABD : Vector Absolute Difference
+defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vabd", "s", int_arm_neon_vabds, 1>;
+defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vabd", "u", int_arm_neon_vabdu, 1>;
+def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
+ "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
+def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
+ "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
+
+// VABDL : Vector Absolute Difference Long (Q = | D - D |)
+defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "s", int_arm_neon_vabds, zext, 1>;
+defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
+
+// VABA : Vector Absolute Difference and Accumulate
+defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "s", int_arm_neon_vabds, add>;
+defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "u", int_arm_neon_vabdu, add>;
+
+// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
+defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
+ "vabal", "s", int_arm_neon_vabds, zext, add>;
+defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
+ "vabal", "u", int_arm_neon_vabdu, zext, add>;
+
+// Vector Maximum and Minimum.
+
+// VMAX : Vector Maximum
+defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmax", "s", int_arm_neon_vmaxs, 1>;
+defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmax", "u", int_arm_neon_vmaxu, 1>;
+def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmax", "f32",
+ v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmax", "f32",
+ v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+
+// VMIN : Vector Minimum
+defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmin", "s", int_arm_neon_vmins, 1>;
+defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmin", "u", int_arm_neon_vminu, 1>;
+def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmin", "f32",
+ v2f32, v2f32, int_arm_neon_vmins, 1>;
+def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmin", "f32",
+ v4f32, v4f32, int_arm_neon_vmins, 1>;
+
+// Vector Pairwise Operations.
+
+// VPADD : Vector Pairwise Add
+def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+ "vpadd", "i8",
+ v8i8, v8i8, int_arm_neon_vpadd, 0>;
+def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+ "vpadd", "i16",
+ v4i16, v4i16, int_arm_neon_vpadd, 0>;
+def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+ "vpadd", "i32",
+ v2i32, v2i32, int_arm_neon_vpadd, 0>;
+def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
+ IIC_VPBIND, "vpadd", "f32",
+ v2f32, v2f32, int_arm_neon_vpadd, 0>;
+
+// VPADDL : Vector Pairwise Add Long
+defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
+ int_arm_neon_vpaddls>;
+defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
+ int_arm_neon_vpaddlu>;
+
+// VPADAL : Vector Pairwise Add and Accumulate Long
+defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
+ int_arm_neon_vpadals>;
+defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
+ int_arm_neon_vpadalu>;
+
+// VPMAX : Vector Pairwise Maximum
+def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
+def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
+def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
+def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
+def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
+def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
+def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
+ "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
+
+// VPMIN : Vector Pairwise Minimum
+def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
+def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
+def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
+def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
+def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
+def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
+def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
+ "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
+
+// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
+
+// VRECPE : Vector Reciprocal Estimate
+def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+ IIC_VUNAD, "vrecpe", "u32",
+ v2i32, v2i32, int_arm_neon_vrecpe>;
+def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+ IIC_VUNAQ, "vrecpe", "u32",
+ v4i32, v4i32, int_arm_neon_vrecpe>;
+def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+ IIC_VUNAD, "vrecpe", "f32",
+ v2f32, v2f32, int_arm_neon_vrecpe>;
+def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+ IIC_VUNAQ, "vrecpe", "f32",
+ v4f32, v4f32, int_arm_neon_vrecpe>;
+
+// VRECPS : Vector Reciprocal Step
+def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrecps", "f32",
+ v2f32, v2f32, int_arm_neon_vrecps, 1>;
+def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrecps", "f32",
+ v4f32, v4f32, int_arm_neon_vrecps, 1>;
+
+// VRSQRTE : Vector Reciprocal Square Root Estimate
+def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+ IIC_VUNAD, "vrsqrte", "u32",
+ v2i32, v2i32, int_arm_neon_vrsqrte>;
+def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+ IIC_VUNAQ, "vrsqrte", "u32",
+ v4i32, v4i32, int_arm_neon_vrsqrte>;
+def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+ IIC_VUNAD, "vrsqrte", "f32",
+ v2f32, v2f32, int_arm_neon_vrsqrte>;
+def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+ IIC_VUNAQ, "vrsqrte", "f32",
+ v4f32, v4f32, int_arm_neon_vrsqrte>;
+
+// VRSQRTS : Vector Reciprocal Square Root Step
+def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrsqrts", "f32",
+ v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
+def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrsqrts", "f32",
+ v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
+
+// Vector Shifts.
+
+// VSHL : Vector Shift
+defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
+ IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+ "vshl", "s", int_arm_neon_vshifts>;
+defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
+ IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+ "vshl", "u", int_arm_neon_vshiftu>;
+// VSHL : Vector Shift Left (Immediate)
+defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl,
+ N2RegVShLFrm>;
+// VSHR : Vector Shift Right (Immediate)
+defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs,
+ N2RegVShRFrm>;
+defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru,
+ N2RegVShRFrm>;
+
+// VSHLL : Vector Shift Left Long
+defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
+defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
+
+// VSHLL : Vector Shift Left Long (with maximum shift count)
+class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
+ bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
+ ValueType OpTy, SDNode OpNode>
+ : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
+ ResTy, OpTy, OpNode> {
+ let Inst{21-16} = op21_16;
+}
+def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
+ v8i16, v8i8, NEONvshlli>;
+def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
+ v4i32, v4i16, NEONvshlli>;
+def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
+ v2i64, v2i32, NEONvshlli>;
+
+// VSHRN : Vector Shift Right and Narrow
+defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
+ NEONvshrn>;
+
+// VRSHL : Vector Rounding Shift
+defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vrshl", "s", int_arm_neon_vrshifts>;
+defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vrshl", "u", int_arm_neon_vrshiftu>;
+// VRSHR : Vector Rounding Shift Right
+defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs,
+ N2RegVShRFrm>;
+defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru,
+ N2RegVShRFrm>;
+
+// VRSHRN : Vector Rounding Shift Right and Narrow
+defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
+ NEONvrshrn>;
+
+// VQSHL : Vector Saturating Shift
+defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqshl", "s", int_arm_neon_vqshifts>;
+defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqshl", "u", int_arm_neon_vqshiftu>;
+// VQSHL : Vector Saturating Shift Left (Immediate)
+defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls,
+ N2RegVShLFrm>;
+defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu,
+ N2RegVShLFrm>;
+// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
+defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu,
+ N2RegVShLFrm>;
+
+// VQSHRN : Vector Saturating Shift Right and Narrow
+defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
+ NEONvqshrns>;
+defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
+ NEONvqshrnu>;
+
+// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
+defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
+ NEONvqshrnsu>;
+
+// VQRSHL : Vector Saturating Rounding Shift
+defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqrshl", "s", int_arm_neon_vqrshifts>;
+defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqrshl", "u", int_arm_neon_vqrshiftu>;
+
+// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
+defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
+ NEONvqrshrns>;
+defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
+ NEONvqrshrnu>;
+
+// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
+defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
+ NEONvqrshrnsu>;
+
+// VSRA : Vector Shift Right and Accumulate
+defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
+defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
+// VRSRA : Vector Rounding Shift Right and Accumulate
+defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
+defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
+
+// VSLI : Vector Shift Left and Insert
+defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>;
+// VSRI : Vector Shift Right and Insert
+defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>;
+
+// Vector Absolute and Saturating Absolute.
+
+// VABS : Vector Absolute Value
+defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
+ IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
+ int_arm_neon_vabs>;
+def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ IIC_VUNAD, "vabs", "f32",
+ v2f32, v2f32, int_arm_neon_vabs>;
+def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ IIC_VUNAQ, "vabs", "f32",
+ v4f32, v4f32, int_arm_neon_vabs>;
+
+// VQABS : Vector Saturating Absolute Value
+defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
+ IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
+ int_arm_neon_vqabs>;
+
+// Vector Negate.
+
+def vnegd : PatFrag<(ops node:$in),
+ (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
+def vnegq : PatFrag<(ops node:$in),
+ (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
+
+class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+ IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
+class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
+ IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
+
+// VNEG : Vector Negate (integer)
+def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
+def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
+def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
+def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
+def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
+def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
+
+// VNEG : Vector Negate (floating-point)
+def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
+ "vneg", "f32", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
+def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
+ "vneg", "f32", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
+
+def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
+def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
+def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
+def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
+def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
+def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
+
+// VQNEG : Vector Saturating Negate
+defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
+ IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
+ int_arm_neon_vqneg>;
+
+// Vector Bit Counting Operations.
+
+// VCLS : Vector Count Leading Sign Bits
+defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
+ IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
+ int_arm_neon_vcls>;
+// VCLZ : Vector Count Leading Zeros
+defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
+ IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
+ int_arm_neon_vclz>;
+// VCNT : Vector Count One Bits
+def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+ IIC_VCNTiD, "vcnt", "8",
+ v8i8, v8i8, int_arm_neon_vcnt>;
+def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+ IIC_VCNTiQ, "vcnt", "8",
+ v16i8, v16i8, int_arm_neon_vcnt>;
+
+// Vector Swap -- for disassembly only.
+def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ "vswp", "$Vd, $Vm", "", []>;
+def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ "vswp", "$Vd, $Vm", "", []>;
+
+// Vector Move Operations.
+
+// VMOV : Vector Move (Register)
+
+let neverHasSideEffects = 1 in {
+def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$Vm),
+ N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> {
+ let Vn{4-0} = Vm{4-0};
+}
+def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$Vm),
+ N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> {
+ let Vn{4-0} = Vm{4-0};
+}
+
+// Pseudo vector move instructions for QQ and QQQQ registers. This should
+// be expanded after register allocation is completed.
+def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
+ NoItinerary, []>;
+
+def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
+ NoItinerary, []>;
+} // neverHasSideEffects
+
+// VMOV : Vector Move (Immediate)
+
+let isReMaterializable = 1 in {
+def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i8", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i8", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
+
+def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i16", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i16", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i64", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i64", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
+} // isReMaterializable
+
+// VMOV : Vector Get Lane (move scalar to ARM core register)
+
+def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "s8", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "s16", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "u8", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "u16", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "32", "$R, $V[$lane]",
+ [(set GPR:$R, (extractelt (v2i32 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{0};
+}
+// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
+def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
+ (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i8_reg imm:$lane))),
+ (SubReg_i8_lane imm:$lane))>;
+def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
+ (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane))>;
+def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
+ (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i8_reg imm:$lane))),
+ (SubReg_i8_lane imm:$lane))>;
+def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
+ (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane))>;
+def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
+ (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane))>;
+def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
+ (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
+ (SSubReg_f32_reg imm:$src2))>;
+def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
+ (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
+ (SSubReg_f32_reg imm:$src2))>;
+//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
+// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
+ (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+
+
+// VMOV : Vector Set Lane (move ARM core register to scalar)
+
+let Constraints = "$src1 = $V" in {
+def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+ IIC_VMOVISL, "vmov", "8", "$V[$lane], $R",
+ [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+ IIC_VMOVISL, "vmov", "16", "$V[$lane], $R",
+ [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+ IIC_VMOVISL, "vmov", "32", "$V[$lane], $R",
+ [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{0};
+}
+}
+def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
+ (v16i8 (INSERT_SUBREG QPR:$src1,
+ (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i8_reg imm:$lane))),
+ GPR:$src2, (SubReg_i8_lane imm:$lane))),
+ (DSubReg_i8_reg imm:$lane)))>;
+def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
+ (v8i16 (INSERT_SUBREG QPR:$src1,
+ (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i16_reg imm:$lane))),
+ GPR:$src2, (SubReg_i16_lane imm:$lane))),
+ (DSubReg_i16_reg imm:$lane)))>;
+def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
+ (v4i32 (INSERT_SUBREG QPR:$src1,
+ (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i32_reg imm:$lane))),
+ GPR:$src2, (SubReg_i32_lane imm:$lane))),
+ (DSubReg_i32_reg imm:$lane)))>;
+
+def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
+ (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
+ SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
+ (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
+ SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+
+//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
+// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
+def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
+ (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
+
+def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
+def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+
+def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
+ (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
+ (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
+ (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+
+def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+ dsub_0)>;
+def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+ dsub_0)>;
+def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+ dsub_0)>;
+
+// VDUP : Vector Duplicate (from ARM core register to all elements)
+
+class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
+ : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", Dt, "$V, $R",
+ [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
+ : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", Dt, "$V, $R",
+ [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+
+def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
+def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
+def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>;
+def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
+def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
+def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
+
+def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", "32", "$V, $R",
+ [(set DPR:$V, (v2f32 (NEONvdup
+ (f32 (bitconvert GPR:$R)))))]>;
+def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", "32", "$V, $R",
+ [(set QPR:$V, (v4f32 (NEONvdup
+ (f32 (bitconvert GPR:$R)))))]>;
+
+// VDUP : Vector Duplicate Lane (from scalar to all elements)
+
+class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
+ ValueType Ty>
+ : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
+ IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+ [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
+
+class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy>
+ : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
+ IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+ [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
+ imm:$lane)))]>;
+
+// Inst{19-16} is partially specified depending on the element size.
+
+def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8> {
+ let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> {
+ let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> {
+ let Inst{19} = lane{0};
+}
+def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32> {
+ let Inst{19} = lane{0};
+}
+def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> {
+ let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> {
+ let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> {
+ let Inst{19} = lane{0};
+}
+def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32> {
+ let Inst{19} = lane{0};
+}
+
+def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
+ (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i8_reg imm:$lane))),
+ (SubReg_i8_lane imm:$lane)))>;
+def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
+ (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
+ (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
+ (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
+ [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
+def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
+ [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
+
+// VMOVN : Vector Narrowing Move
+defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
+ "vmovn", "i", trunc>;
+// VQMOVN : Vector Saturating Narrowing Move
+defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
+ "vqmovn", "s", int_arm_neon_vqmovns>;
+defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
+ "vqmovn", "u", int_arm_neon_vqmovnu>;
+defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
+ "vqmovun", "s", int_arm_neon_vqmovnsu>;
+// VMOVL : Vector Lengthening Move
+defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
+defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
+
+// Vector Conversions.
+
+// VCVT : Vector Convert Between Floating-Point and Integers
+def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+ v2i32, v2f32, fp_to_sint>;
+def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+ v2i32, v2f32, fp_to_uint>;
+def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+ v2f32, v2i32, sint_to_fp>;
+def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+ v2f32, v2i32, uint_to_fp>;
+
+def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+ v4i32, v4f32, fp_to_sint>;
+def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+ v4i32, v4f32, fp_to_uint>;
+def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+ v4f32, v4i32, sint_to_fp>;
+def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+ v4f32, v4i32, uint_to_fp>;
+
+// VCVT : Vector Convert Between Floating-Point and Fixed-Point.
+def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
+ v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
+def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
+ v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
+ v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
+ v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+
+def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
+ v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
+def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
+ v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
+ v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
+ v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+
+// VCVT : Vector Convert Between Half-Precision and Single-Precision.
+def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
+ IIC_VUNAQ, "vcvt", "f16.f32",
+ v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
+ Requires<[HasNEON, HasFP16]>;
+def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
+ IIC_VUNAQ, "vcvt", "f32.f16",
+ v4f32, v4i16, int_arm_neon_vcvthf2fp>,
+ Requires<[HasNEON, HasFP16]>;
+
+// Vector Reverse.
+
+// VREV64 : Vector Reverse elements within 64-bit doublewords
+
+class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
+class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
+
+def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
+def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
+def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
+def VREV64df : VREV64D<0b10, "vrev64", "32", v2f32>;
+
+def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
+def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
+def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
+def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>;
+
+// VREV32 : Vector Reverse elements within 32-bit words
+
+class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
+class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
+
+def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
+def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
+
+def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
+def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
+
+// VREV16 : Vector Reverse elements within 16-bit halfwords
+
+class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
+class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
+
+def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
+def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
+
+// Other Vector Shuffles.
+
+// Aligned extractions: really just dropping registers
+
+class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
+ : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
+ (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
+
+def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
+
+def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
+
+def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
+
+def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
+
+def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
+
+
+// VEXT : Vector Extract
+
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
+ : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
+ IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+ [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
+ (Ty DPR:$Vm), imm:$index)))]> {
+ bits<4> index;
+ let Inst{11-8} = index{3-0};
+}
+
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
+ : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
+ IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+ [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
+ (Ty QPR:$Vm), imm:$index)))]> {
+ bits<4> index;
+ let Inst{11-8} = index{3-0};
+}
+
+def VEXTd8 : VEXTd<"vext", "8", v8i8> {
+ let Inst{11-8} = index{3-0};
+}
+def VEXTd16 : VEXTd<"vext", "16", v4i16> {
+ let Inst{11-9} = index{2-0};
+ let Inst{8} = 0b0;
+}
+def VEXTd32 : VEXTd<"vext", "32", v2i32> {
+ let Inst{11-10} = index{1-0};
+ let Inst{9-8} = 0b00;
+}
+def VEXTdf : VEXTd<"vext", "32", v2f32> {
+ let Inst{11} = index{0};
+ let Inst{10-8} = 0b000;
+}
+
+def VEXTq8 : VEXTq<"vext", "8", v16i8> {
+ let Inst{11-8} = index{3-0};
+}
+def VEXTq16 : VEXTq<"vext", "16", v8i16> {
+ let Inst{11-9} = index{2-0};
+ let Inst{8} = 0b0;
+}
+def VEXTq32 : VEXTq<"vext", "32", v4i32> {
+ let Inst{11-10} = index{1-0};
+ let Inst{9-8} = 0b00;
+}
+def VEXTqf : VEXTq<"vext", "32", v4f32> {
+ let Inst{11} = index{0};
+ let Inst{10-8} = 0b000;
+}
+
+// VTRN : Vector Transpose
+
+def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
+def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
+def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
+
+def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
+def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
+def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
+
+// VUZP : Vector Unzip (Deinterleave)
+
+def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
+def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
+def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">;
+
+def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
+def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
+def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
+
+// VZIP : Vector Zip (Interleave)
+
+def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
+def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
+def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">;
+
+def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
+def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
+def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
+
+// Vector Table Lookup and Table Extension.
+
+// VTBL : Vector Table Lookup
+def VTBL1
+ : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
+ "vtbl", "8", "$Vd, \\{$Vn\\}, $Vm", "",
+ [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 DPR:$Vn, DPR:$Vm)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def VTBL2
+ : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+ "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
+def VTBL3
+ : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+ "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
+def VTBL4
+ : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
+ NVTBLFrm, IIC_VTB4,
+ "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
+} // hasExtraSrcRegAllocReq = 1
+
+def VTBL2Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>;
+def VTBL3Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
+def VTBL4Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
+
+// VTBX : Vector Table Extension
+def VTBX1
+ : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
+ "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd",
+ [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
+ DPR:$orig, DPR:$Vn, DPR:$Vm)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def VTBX2
+ : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+ "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
+def VTBX3
+ : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
+ NVTBLFrm, IIC_VTBX3,
+ "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
+ "$orig = $Vd", []>;
+def VTBX4
+ : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
+ DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+ "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
+ "$orig = $Vd", []>;
+} // hasExtraSrcRegAllocReq = 1
+
+def VTBX2Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src),
+ IIC_VTBX2, "$orig = $dst", []>;
+def VTBX3Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+ IIC_VTBX3, "$orig = $dst", []>;
+def VTBX4Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+ IIC_VTBX4, "$orig = $dst", []>;
+
+//===----------------------------------------------------------------------===//
+// NEON instructions for single-precision FP math
+//===----------------------------------------------------------------------===//
+
+class N2VSPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$a)),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+class N3VSPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$acc, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+def : N3VSPat<fadd, VADDfd>;
+def : N3VSPat<fsub, VSUBfd>;
+def : N3VSPat<fmul, VMULfd>;
+def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+def : N2VSPat<fabs, VABSfd>;
+def : N2VSPat<fneg, VNEGfd>;
+def : N3VSPat<NEONfmax, VMAXfd>;
+def : N3VSPat<NEONfmin, VMINfd>;
+def : N2VSPat<arm_ftosi, VCVTf2sd>;
+def : N2VSPat<arm_ftoui, VCVTf2ud>;
+def : N2VSPat<arm_sitof, VCVTs2fd>;
+def : N2VSPat<arm_uitof, VCVTu2fd>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// bit_convert
+def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
new file mode 100644
index 0000000..826ef46
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -0,0 +1,1550 @@
+//===- ARMInstrThumb.td - Thumb support for ARM ------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Thumb specific DAG Nodes.
+//
+
+def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
+}]>;
+def imm_comp_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+}]>;
+
+/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
+def imm0_7 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getZExtValue() < 8;
+}]>;
+def imm0_7_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)-N->getZExtValue() < 8;
+}], imm_neg_XFORM>;
+
+def imm0_255 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getZExtValue() < 256;
+}]>;
+def imm0_255_comp : PatLeaf<(i32 imm), [{
+ return ~((uint32_t)N->getZExtValue()) < 256;
+}]>;
+
+def imm8_255 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getZExtValue() >= 8 && (uint32_t)N->getZExtValue() < 256;
+}]>;
+def imm8_255_neg : PatLeaf<(i32 imm), [{
+ unsigned Val = -N->getZExtValue();
+ return Val >= 8 && Val < 256;
+}], imm_neg_XFORM>;
+
+// Break imm's up into two pieces: an immediate + a left shift. This uses
+// thumb_immshifted to match and thumb_immshifted_val and thumb_immshifted_shamt
+// to get the val/shift pieces.
+def thumb_immshifted : PatLeaf<(imm), [{
+ return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue());
+}]>;
+
+def thumb_immshifted_val : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def thumb_immshifted_shamt : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+// ADR instruction labels.
+def t_adrlabel : Operand<i32> {
+ let EncoderMethod = "getThumbAdrLabelOpValue";
+}
+
+// Scaled 4 immediate.
+def t_imm_s4 : Operand<i32> {
+ let PrintMethod = "printThumbS4ImmOperand";
+}
+
+// Define Thumb specific addressing modes.
+
+def t_brtarget : Operand<OtherVT> {
+ let EncoderMethod = "getThumbBRTargetOpValue";
+}
+
+def t_bcctarget : Operand<i32> {
+ let EncoderMethod = "getThumbBCCTargetOpValue";
+}
+
+def t_cbtarget : Operand<i32> {
+ let EncoderMethod = "getThumbCBTargetOpValue";
+}
+
+def t_bltarget : Operand<i32> {
+ let EncoderMethod = "getThumbBLTargetOpValue";
+}
+
+def t_blxtarget : Operand<i32> {
+ let EncoderMethod = "getThumbBLXTargetOpValue";
+}
+
+def MemModeRegThumbAsmOperand : AsmOperandClass {
+ let Name = "MemModeRegThumb";
+ let SuperClasses = [];
+}
+
+def MemModeImmThumbAsmOperand : AsmOperandClass {
+ let Name = "MemModeImmThumb";
+ let SuperClasses = [];
+}
+
+// t_addrmode_rr := reg + reg
+//
+def t_addrmode_rr : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+
+// t_addrmode_rrs := reg + reg
+//
+def t_addrmode_rrs1 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+ let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+def t_addrmode_rrs2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+ let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+def t_addrmode_rrs4 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+ let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+
+// t_addrmode_is4 := reg + imm5 * 4
+//
+def t_addrmode_is4 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let PrintMethod = "printThumbAddrModeImm5S4Operand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_is2 := reg + imm5 * 2
+//
+def t_addrmode_is2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let PrintMethod = "printThumbAddrModeImm5S2Operand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_is1 := reg + imm5
+//
+def t_addrmode_is1 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let PrintMethod = "printThumbAddrModeImm5S1Operand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_sp := sp + imm8 * 4
+//
+def t_addrmode_sp : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
+ let EncoderMethod = "getAddrModeThumbSPOpValue";
+ let PrintMethod = "printThumbAddrModeSPOperand";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_pc := <label> => pc + imm8 * 4
+//
+def t_addrmode_pc : Operand<i32> {
+ let EncoderMethod = "getAddrModePCOpValue";
+ let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def tADJCALLSTACKUP :
+ PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
+ [(ARMcallseq_end imm:$amt1, imm:$amt2)]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+
+def tADJCALLSTACKDOWN :
+ PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
+ [(ARMcallseq_start imm:$amt)]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+}
+
+// T1Disassembly - A simple class to make encoding some disassembly patterns
+// easier and less verbose.
+class T1Disassembly<bits<2> op1, bits<8> op2>
+ : T1Encoding<0b101111> {
+ let Inst{9-8} = op1;
+ let Inst{7-0} = op2;
+}
+
+def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Disassembly<0b11, 0x00>; // A8.6.110
+
+def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Disassembly<0b11, 0x10>; // A8.6.410
+
+def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Disassembly<0b11, 0x20>; // A8.6.408
+
+def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Disassembly<0b11, 0x30>; // A8.6.409
+
+def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Disassembly<0b11, 0x40>; // A8.6.157
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Disassembly<0b10, {?,?,?,?,?,?,?,?}> {
+ // A8.6.22
+ bits<8> val;
+ let Inst{7-0} = val;
+}
+
+def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101101> {
+ // A8.6.156
+ let Inst{9-5} = 0b10010;
+ let Inst{4} = 1;
+ let Inst{3} = 1; // Big-Endian
+ let Inst{2-0} = 0b000;
+}
+
+def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101101> {
+ // A8.6.156
+ let Inst{9-5} = 0b10010;
+ let Inst{4} = 1;
+ let Inst{3} = 0; // Little-Endian
+ let Inst{2-0} = 0b000;
+}
+
+// Change Processor State is a system instruction -- for disassembly only.
+def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags),
+ NoItinerary, "cps$imod $iflags",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Misc<0b0110011> {
+ // A8.6.38 & B6.1.1
+ bit imod;
+ bits<3> iflags;
+
+ let Inst{4} = imod;
+ let Inst{3} = 0;
+ let Inst{2-0} = iflags;
+}
+
+// For both thumb1 and thumb2.
+let isNotDuplicable = 1, isCodeGenOnly = 1 in
+def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
+ [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.6
+ bits<3> dst;
+ let Inst{6-3} = 0b1111; // Rm = pc
+ let Inst{2-0} = dst;
+}
+
+// PC relative add (ADR).
+def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi,
+ "add\t$dst, pc, $rhs", []>,
+ T1Encoding<{1,0,1,0,0,?}> {
+ // A6.2 & A8.6.10
+ bits<3> dst;
+ bits<8> rhs;
+ let Inst{10-8} = dst;
+ let Inst{7-0} = rhs;
+}
+
+// ADD <Rd>, sp, #<imm8>
+// This is rematerializable, which is particularly useful for taking the
+// address of locals.
+let isReMaterializable = 1 in
+def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi,
+ "add\t$dst, $sp, $rhs", []>,
+ T1Encoding<{1,0,1,0,1,?}> {
+ // A6.2 & A8.6.8
+ bits<3> dst;
+ bits<8> rhs;
+ let Inst{10-8} = dst;
+ let Inst{7-0} = rhs;
+}
+
+// ADD sp, sp, #<imm7>
+def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
+ "add\t$dst, $rhs", []>,
+ T1Misc<{0,0,0,0,0,?,?}> {
+ // A6.2.5 & A8.6.8
+ bits<7> rhs;
+ let Inst{6-0} = rhs;
+}
+
+// SUB sp, sp, #<imm7>
+// FIXME: The encoding and the ASM string don't match up.
+def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
+ "sub\t$dst, $rhs", []>,
+ T1Misc<{0,0,0,0,1,?,?}> {
+ // A6.2.5 & A8.6.214
+ bits<7> rhs;
+ let Inst{6-0} = rhs;
+}
+
+// ADD <Rm>, sp
+def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ "add\t$dst, $rhs", []>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.9 Encoding T1
+ bits<4> dst;
+ let Inst{7} = dst{3};
+ let Inst{6-3} = 0b1101;
+ let Inst{2-0} = dst{2-0};
+}
+
+// ADD sp, <Rm>
+def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ "add\t$dst, $rhs", []>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.9 Encoding T2
+ bits<4> dst;
+ let Inst{7} = 1;
+ let Inst{6-3} = dst;
+ let Inst{2-0} = 0b101;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def tBX_RET : TI<(outs), (ins), IIC_Br, "bx\tlr",
+ [(ARMretflag)]>,
+ T1Special<{1,1,0,?}> {
+ // A6.2.3 & A8.6.25
+ let Inst{6-3} = 0b1110; // Rm = lr
+ let Inst{2-0} = 0b000;
+ }
+
+ // Alternative return instruction used by vararg functions.
+ def tBX_RET_vararg : TI<(outs), (ins tGPR:$Rm),
+ IIC_Br, "bx\t$Rm",
+ []>,
+ T1Special<{1,1,0,?}> {
+ // A6.2.3 & A8.6.25
+ bits<4> Rm;
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = 0b000;
+ }
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def tBRIND : TI<(outs), (ins GPR:$Rm),
+ IIC_Br,
+ "mov\tpc, $Rm",
+ [(brind GPR:$Rm)]>,
+ T1Special<{1,0,?,?}> {
+ // A8.6.97
+ bits<4> Rm;
+ let Inst{7} = 1; // <Rd> = Inst{7:2-0} = pc
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = 0b111;
+ }
+}
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1 in
+def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ IIC_iPop_Br,
+ "pop${p}\t$regs", []>,
+ T1Misc<{1,1,0,?,?,?,?}> {
+ // A8.6.121
+ bits<16> regs;
+ let Inst{8} = regs{15}; // registers = P:'0000000':register_list
+ let Inst{7-0} = regs{7-0};
+}
+
+// All calls clobber the non-callee saved registers. SP is marked as a use to
+// prevent stack-pointer assignments that appear immediately before calls from
+// potentially appearing dead.
+let isCall = 1,
+ // On non-Darwin platforms R9 is callee-saved.
+ Defs = [R0, R1, R2, R3, R12, LR,
+ D0, D1, D2, D3, D4, D5, D6, D7,
+ D16, D17, D18, D19, D20, D21, D22, D23,
+ D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+ Uses = [SP] in {
+ // Also used for Thumb2
+ def tBL : TIx2<0b11110, 0b11, 1,
+ (outs), (ins t_bltarget:$func, variable_ops), IIC_Br,
+ "bl\t$func",
+ [(ARMtcall tglobaladdr:$func)]>,
+ Requires<[IsThumb, IsNotDarwin]> {
+ bits<21> func;
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = 1;
+ let Inst{11} = 1;
+ let Inst{10-0} = func{10-0};
+ }
+
+ // ARMv5T and above, also used for Thumb2
+ def tBLXi : TIx2<0b11110, 0b11, 0,
+ (outs), (ins t_blxtarget:$func, variable_ops), IIC_Br,
+ "blx\t$func",
+ [(ARMcall tglobaladdr:$func)]>,
+ Requires<[IsThumb, HasV5T, IsNotDarwin]> {
+ bits<21> func;
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = 1;
+ let Inst{11} = 1;
+ let Inst{10-1} = func{10-1};
+ let Inst{0} = 0; // func{0} is assumed zero
+ }
+
+ // Also used for Thumb2
+ def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
+ "blx\t$func",
+ [(ARMtcall GPR:$func)]>,
+ Requires<[IsThumb, HasV5T, IsNotDarwin]>,
+ T1Special<{1,1,1,?}>; // A6.2.3 & A8.6.24;
+
+ // ARMv4T
+ // FIXME: Should be a pseudo.
+ let isCodeGenOnly = 1 in
+ def tBX : TIx2<{?,?,?,?,?}, {?,?}, ?,
+ (outs), (ins tGPR:$func, variable_ops), IIC_Br,
+ "mov\tlr, pc\n\tbx\t$func",
+ [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>;
+}
+
+let isCall = 1,
+ // On Darwin R9 is call-clobbered.
+ // R7 is marked as a use to prevent frame-pointer assignments from being
+ // moved above / below calls.
+ Defs = [R0, R1, R2, R3, R9, R12, LR,
+ D0, D1, D2, D3, D4, D5, D6, D7,
+ D16, D17, D18, D19, D20, D21, D22, D23,
+ D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+ Uses = [R7, SP] in {
+ // Also used for Thumb2
+ def tBLr9 : TIx2<0b11110, 0b11, 1,
+ (outs), (ins pred:$p, t_bltarget:$func, variable_ops),
+ IIC_Br, "bl${p}\t$func",
+ [(ARMtcall tglobaladdr:$func)]>,
+ Requires<[IsThumb, IsDarwin]> {
+ bits<21> func;
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = 1;
+ let Inst{11} = 1;
+ let Inst{10-0} = func{10-0};
+ }
+
+ // ARMv5T and above, also used for Thumb2
+ def tBLXi_r9 : TIx2<0b11110, 0b11, 0,
+ (outs), (ins pred:$p, t_blxtarget:$func, variable_ops),
+ IIC_Br, "blx${p}\t$func",
+ [(ARMcall tglobaladdr:$func)]>,
+ Requires<[IsThumb, HasV5T, IsDarwin]> {
+ bits<21> func;
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = 1;
+ let Inst{11} = 1;
+ let Inst{10-1} = func{10-1};
+ let Inst{0} = 0; // func{0} is assumed zero
+ }
+
+ // Also used for Thumb2
+ def tBLXr_r9 : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
+ "blx${p}\t$func",
+ [(ARMtcall GPR:$func)]>,
+ Requires<[IsThumb, HasV5T, IsDarwin]>,
+ T1Special<{1,1,1,?}> {
+ // A6.2.3 & A8.6.24
+ bits<4> func;
+ let Inst{6-3} = func;
+ let Inst{2-0} = 0b000;
+ }
+
+ // ARMv4T
+ let isCodeGenOnly = 1 in
+ // FIXME: Should be a pseudo.
+ def tBXr9 : TIx2<{?,?,?,?,?}, {?,?}, ?,
+ (outs), (ins tGPR:$func, variable_ops), IIC_Br,
+ "mov\tlr, pc\n\tbx\t$func",
+ [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsThumb, IsThumb1Only, IsDarwin]>;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+ let isPredicable = 1 in
+ def tB : T1I<(outs), (ins t_brtarget:$target), IIC_Br,
+ "b\t$target", [(br bb:$target)]>,
+ T1Encoding<{1,1,1,0,0,?}> {
+ bits<11> target;
+ let Inst{10-0} = target;
+ }
+
+ // Far jump
+ // Just a pseudo for a tBL instruction. Needed to let regalloc know about
+ // the clobber of LR.
+ let Defs = [LR] in
+ def tBfar : tPseudoInst<(outs), (ins t_bltarget:$target),
+ Size4Bytes, IIC_Br, []>;
+
+ def tBR_JTr : tPseudoInst<(outs),
+ (ins tGPR:$target, i32imm:$jt, i32imm:$id),
+ SizeSpecial, IIC_Br,
+ [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> {
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+ }
+}
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+ def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br,
+ "b${p}\t$target",
+ [/*(ARMbrcond bb:$target, imm:$cc)*/]>,
+ T1Encoding<{1,1,0,1,?,?}> {
+ bits<4> p;
+ bits<8> target;
+ let Inst{11-8} = p;
+ let Inst{7-0} = target;
+}
+
+// Compare and branch on zero / non-zero
+let isBranch = 1, isTerminator = 1 in {
+ def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+ "cbz\t$Rn, $target", []>,
+ T1Misc<{0,0,?,1,?,?,?}> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
+
+ def tCBNZ : T1I<(outs), (ins tGPR:$cmp, t_cbtarget:$target), IIC_Br,
+ "cbnz\t$cmp, $target", []>,
+ T1Misc<{1,0,?,1,?,?,?}> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
+}
+
+// A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only
+// A8.6.16 B: Encoding T1
+// If Inst{11-8} == 0b1111 then SEE SVC
+let isCall = 1, Uses = [SP] in
+def tSVC : T1pI<(outs), (ins i32imm:$imm), IIC_Br,
+ "svc", "\t$imm", []>, Encoding16 {
+ bits<8> imm;
+ let Inst{15-12} = 0b1101;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = imm;
+}
+
+// The assembler uses 0xDEFE for a trap instruction.
+let isBarrier = 1, isTerminator = 1 in
+def tTRAP : TI<(outs), (ins), IIC_Br,
+ "trap", [(trap)]>, Encoding16 {
+ let Inst = 0xdefe;
+}
+
+//===----------------------------------------------------------------------===//
+// Load Store Instructions.
+//
+
+// Loads: reg/reg and reg/imm5
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+ Operand AddrMode_r, Operand AddrMode_i,
+ AddrMode am, InstrItinClass itin_r,
+ InstrItinClass itin_i, string asm,
+ PatFrag opnode> {
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs tGPR:$Rt), (ins AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
+ def i : // reg/imm5
+ T1pILdStEncodeImm<imm_opc, 1 /* Load */,
+ (outs tGPR:$Rt), (ins AddrMode_i:$addr),
+ am, itin_i, asm, "\t$Rt, $addr",
+ [(set tGPR:$Rt, (opnode AddrMode_i:$addr))]>;
+}
+// Stores: reg/reg and reg/imm5
+multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+ Operand AddrMode_r, Operand AddrMode_i,
+ AddrMode am, InstrItinClass itin_r,
+ InstrItinClass itin_i, string asm,
+ PatFrag opnode> {
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
+ def i : // reg/imm5
+ T1pILdStEncodeImm<imm_opc, 0 /* Store */,
+ (outs), (ins tGPR:$Rt, AddrMode_i:$addr),
+ am, itin_i, asm, "\t$Rt, $addr",
+ [(opnode tGPR:$Rt, AddrMode_i:$addr)]>;
+}
+
+// A8.6.57 & A8.6.60
+defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4,
+ t_addrmode_is4, AddrModeT1_4,
+ IIC_iLoad_r, IIC_iLoad_i, "ldr",
+ UnOpFrag<(load node:$Src)>>;
+
+// A8.6.64 & A8.6.61
+defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1,
+ t_addrmode_is1, AddrModeT1_1,
+ IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
+ UnOpFrag<(zextloadi8 node:$Src)>>;
+
+// A8.6.76 & A8.6.73
+defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2,
+ t_addrmode_is2, AddrModeT1_2,
+ IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
+ UnOpFrag<(zextloadi16 node:$Src)>>;
+
+let AddedComplexity = 10 in
+def tLDRSB : // A8.6.80
+ T1pILdStEncode<0b011, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+ AddrModeT1_1, IIC_iLoad_bh_r,
+ "ldrsb", "\t$dst, $addr",
+ [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
+
+let AddedComplexity = 10 in
+def tLDRSH : // A8.6.84
+ T1pILdStEncode<0b111, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+ AddrModeT1_2, IIC_iLoad_bh_r,
+ "ldrsh", "\t$dst, $addr",
+ [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
+
+let canFoldAsLoad = 1 in
+def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
+ T1LdStSP<{1,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// Special instruction for restore. It cannot clobber condition register
+// when it's expanded by eliminateCallFramePseudoInstr().
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in
+// FIXME: Pseudo for tLDRspi
+def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+ "ldr", "\t$dst, $addr", []>,
+ T1LdStSP<{1,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// Load tconstpool
+// FIXME: Use ldr.n to work around a Darwin assembler bug.
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+ "ldr", ".n\t$Rt, $addr",
+ [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
+ T1Encoding<{0,1,0,0,1,?}> {
+ // A6.2 & A8.6.59
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// A8.6.194 & A8.6.192
+defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
+ t_addrmode_is4, AddrModeT1_4,
+ IIC_iStore_r, IIC_iStore_i, "str",
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+
+// A8.6.197 & A8.6.195
+defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1,
+ t_addrmode_is1, AddrModeT1_1,
+ IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// A8.6.207 & A8.6.205
+defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2,
+ t_addrmode_is2, AddrModeT1_2,
+ IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
+ BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+
+def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
+ "str", "\t$Rt, $addr",
+ [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
+ T1LdStSP<{0,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+let mayStore = 1, neverHasSideEffects = 1 in
+// Special instruction for spill. It cannot clobber condition register when it's
+// expanded by eliminateCallFramePseudoInstr().
+// FIXME: Pseudo for tSTRspi
+def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStore_i,
+ "str", "\t$src, $addr", []>,
+ T1LdStSP<{0,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass thumb_ldst_mult<string asm, InstrItinClass itin,
+ InstrItinClass itin_upd, bits<6> T1Enc,
+ bit L_bit> {
+ def IA :
+ T1I<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "ia${p}\t$Rn, $regs"), []>,
+ T1Encoding<T1Enc> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+ }
+ def IA_UPD :
+ T1It<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []>,
+ T1Encoding<T1Enc> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+ }
+}
+
+// These require base address to be written back or one of the loaded regs.
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm tLDM : thumb_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu,
+ {1,1,0,0,1,?}, 1>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm tSTM : thumb_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu,
+ {1,1,0,0,0,?}, 0>;
+
+} // neverHasSideEffects
+
+let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
+def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ IIC_iPop,
+ "pop${p}\t$regs", []>,
+ T1Misc<{1,1,0,?,?,?,?}> {
+ bits<16> regs;
+ let Inst{8} = regs{15};
+ let Inst{7-0} = regs{7-0};
+}
+
+let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
+def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ IIC_iStore_m,
+ "push${p}\t$regs", []>,
+ T1Misc<{0,1,0,?,?,?,?}> {
+ bits<16> regs;
+ let Inst{8} = regs{14};
+ let Inst{7-0} = regs{7-0};
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+// Helper classes for encoding T1pI patterns:
+class T1pIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1pI<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rm;
+ bits<3> Rn;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rn;
+}
+class T1pIMiscEncode<bits<7> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1pI<oops, iops, itin, opc, asm, pattern>,
+ T1Misc<opA> {
+ bits<3> Rm;
+ bits<3> Rd;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sI patterns:
+class T1sIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rd;
+ bits<3> Rn;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+}
+class T1sIGenEncode<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rm;
+ bits<3> Rn;
+ bits<3> Rd;
+ let Inst{8-6} = Rm;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+}
+class T1sIGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rd;
+ bits<3> Rm;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sIt patterns:
+class T1sItDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sIt<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rdn;
+ bits<3> Rm;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rdn;
+}
+class T1sItGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sIt<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rdn;
+ bits<8> imm8;
+ let Inst{10-8} = Rdn;
+ let Inst{7-0} = imm8;
+}
+
+// Add with carry register
+let isCommutable = 1, Uses = [CPSR] in
+def tADC : // A8.6.2
+ T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
+ "adc", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>;
+
+// Add immediate
+def tADDi3 : // A8.6.4 T1
+ T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3), IIC_iALUi,
+ "add", "\t$Rd, $Rm, $imm3",
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> {
+ bits<3> imm3;
+ let Inst{8-6} = imm3;
+}
+
+def tADDi8 : // A8.6.4 T2
+ T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
+ IIC_iALUi,
+ "add", "\t$Rdn, $imm8",
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>;
+
+// Add register
+let isCommutable = 1 in
+def tADDrr : // A8.6.6 T1
+ T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "add", "\t$Rd, $Rn, $Rm",
+ [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>;
+
+let neverHasSideEffects = 1 in
+def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
+ "add", "\t$Rdn, $Rm", []>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.6 T2
+ bits<4> Rdn;
+ bits<4> Rm;
+ let Inst{7} = Rdn{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rdn{2-0};
+}
+
+// AND register
+let isCommutable = 1 in
+def tAND : // A8.6.12
+ T1sItDPEncode<0b0000, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "and", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>;
+
+// ASR immediate
+def tASRri : // A8.6.14
+ T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+ IIC_iMOVsi,
+ "asr", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm:$imm5)))]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
+
+// ASR register
+def tASRrr : // A8.6.15
+ T1sItDPEncode<0b0100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "asr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>;
+
+// BIC register
+def tBIC : // A8.6.20
+ T1sItDPEncode<0b1110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "bic", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>;
+
+// CMN register
+let isCompare = 1, Defs = [CPSR] in {
+//FIXME: Disable CMN, as CCodes are backwards from compare expectations
+// Compare-to-zero still works out, just not the relationals
+//def tCMN : // A8.6.33
+// T1pIDPEncode<0b1011, (outs), (ins tGPR:$lhs, tGPR:$rhs),
+// IIC_iCMPr,
+// "cmn", "\t$lhs, $rhs",
+// [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
+
+def tCMNz : // A8.6.33
+ T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iCMPr,
+ "cmn", "\t$Rn, $Rm",
+ [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>;
+
+} // isCompare = 1, Defs = [CPSR]
+
+// CMP immediate
+let isCompare = 1, Defs = [CPSR] in {
+def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, i32imm:$imm8), IIC_iCMPi,
+ "cmp", "\t$Rn, $imm8",
+ [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>,
+ T1General<{1,0,1,?,?}> {
+ // A8.6.35
+ bits<3> Rn;
+ bits<8> imm8;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = imm8;
+}
+
+// CMP register
+def tCMPr : // A8.6.36 T1
+ T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iCMPr,
+ "cmp", "\t$Rn, $Rm",
+ [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>;
+
+def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr,
+ "cmp", "\t$Rn, $Rm", []>,
+ T1Special<{0,1,?,?}> {
+ // A8.6.36 T2
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{7} = Rn{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rn{2-0};
+}
+} // isCompare = 1, Defs = [CPSR]
+
+
+// XOR register
+let isCommutable = 1 in
+def tEOR : // A8.6.45
+ T1sItDPEncode<0b0001, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "eor", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>;
+
+// LSL immediate
+def tLSLri : // A8.6.88
+ T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+ IIC_iMOVsi,
+ "lsl", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
+
+// LSL register
+def tLSLrr : // A8.6.89
+ T1sItDPEncode<0b0010, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "lsl", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>;
+
+// LSR immediate
+def tLSRri : // A8.6.90
+ T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+ IIC_iMOVsi,
+ "lsr", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm:$imm5)))]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
+
+// LSR register
+def tLSRrr : // A8.6.91
+ T1sItDPEncode<0b0011, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "lsr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>;
+
+// Move register
+let isMoveImm = 1 in
+def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins i32imm:$imm8), IIC_iMOVi,
+ "mov", "\t$Rd, $imm8",
+ [(set tGPR:$Rd, imm0_255:$imm8)]>,
+ T1General<{1,0,0,?,?}> {
+ // A8.6.96
+ bits<3> Rd;
+ bits<8> imm8;
+ let Inst{10-8} = Rd;
+ let Inst{7-0} = imm8;
+}
+
+// TODO: A7-73: MOV(2) - mov setting flag.
+
+let neverHasSideEffects = 1 in {
+// FIXME: Make this predicable.
+def tMOVr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+ "mov\t$Rd, $Rm", []>,
+ T1Special<0b1000> {
+ // A8.6.97
+ bits<4> Rd;
+ bits<4> Rm;
+ // Bits {7-6} are encoded by the T1Special value.
+ let Inst{5-3} = Rm{2-0};
+ let Inst{2-0} = Rd{2-0};
+}
+let Defs = [CPSR] in
+def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+ "movs\t$Rd, $Rm", []>, Encoding16 {
+ // A8.6.97
+ bits<3> Rd;
+ bits<3> Rm;
+ let Inst{15-6} = 0b0000000000;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+
+// FIXME: Make these predicable.
+def tMOVgpr2tgpr : T1I<(outs tGPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+ "mov\t$Rd, $Rm", []>,
+ T1Special<{1,0,0,?}> {
+ // A8.6.97
+ bits<4> Rd;
+ bits<4> Rm;
+ // Bit {7} is encoded by the T1Special value.
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rd{2-0};
+}
+def tMOVtgpr2gpr : T1I<(outs GPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+ "mov\t$Rd, $Rm", []>,
+ T1Special<{1,0,?,0}> {
+ // A8.6.97
+ bits<4> Rd;
+ bits<4> Rm;
+ // Bit {6} is encoded by the T1Special value.
+ let Inst{7} = Rd{3};
+ let Inst{5-3} = Rm{2-0};
+ let Inst{2-0} = Rd{2-0};
+}
+def tMOVgpr2gpr : T1I<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+ "mov\t$Rd, $Rm", []>,
+ T1Special<{1,0,?,?}> {
+ // A8.6.97
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{7} = Rd{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rd{2-0};
+}
+} // neverHasSideEffects
+
+// Multiply register
+let isCommutable = 1 in
+def tMUL : // A8.6.105 T1
+ T1sItDPEncode<0b1101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMUL32,
+ "mul", "\t$Rdn, $Rm, $Rdn",
+ [(set tGPR:$Rdn, (mul tGPR:$Rn, tGPR:$Rm))]>;
+
+// Move inverse register
+def tMVN : // A8.6.107
+ T1sIDPEncode<0b1111, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iMVNr,
+ "mvn", "\t$Rd, $Rn",
+ [(set tGPR:$Rd, (not tGPR:$Rn))]>;
+
+// Bitwise or register
+let isCommutable = 1 in
+def tORR : // A8.6.114
+ T1sItDPEncode<0b1100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "orr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>;
+
+// Swaps
+def tREV : // A8.6.134
+ T1pIMiscEncode<{1,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "rev", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (bswap tGPR:$Rm))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREV16 : // A8.6.135
+ T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "rev16", "\t$Rd, $Rm",
+ [(set tGPR:$Rd,
+ (or (and (srl tGPR:$Rm, (i32 8)), 0xFF),
+ (or (and (shl tGPR:$Rm, (i32 8)), 0xFF00),
+ (or (and (srl tGPR:$Rm, (i32 8)), 0xFF0000),
+ (and (shl tGPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREVSH : // A8.6.136
+ T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "revsh", "\t$Rd, $Rm",
+ [(set tGPR:$Rd,
+ (sext_inreg
+ (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)),
+ (shl tGPR:$Rm, (i32 8))), i16))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Rotate right register
+def tROR : // A8.6.139
+ T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "ror", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>;
+
+// Negate register
+def tRSB : // A8.6.141
+ T1sIDPEncode<0b1001, (outs tGPR:$Rd), (ins tGPR:$Rn),
+ IIC_iALUi,
+ "rsb", "\t$Rd, $Rn, #0",
+ [(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
+
+// Subtract with carry register
+let Uses = [CPSR] in
+def tSBC : // A8.6.151
+ T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "sbc", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>;
+
+// Subtract immediate
+def tSUBi3 : // A8.6.210 T1
+ T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3),
+ IIC_iALUi,
+ "sub", "\t$Rd, $Rm, $imm3",
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> {
+ bits<3> imm3;
+ let Inst{8-6} = imm3;
+}
+
+def tSUBi8 : // A8.6.210 T2
+ T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
+ IIC_iALUi,
+ "sub", "\t$Rdn, $imm8",
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>;
+
+// Subtract register
+def tSUBrr : // A8.6.212
+ T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "sub", "\t$Rd, $Rn, $Rm",
+ [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>;
+
+// TODO: A7-96: STMIA - store multiple.
+
+// Sign-extend byte
+def tSXTB : // A8.6.222
+ T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "sxtb", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i8))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Sign-extend short
+def tSXTH : // A8.6.224
+ T1pIMiscEncode<{0,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "sxth", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i16))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Test
+let isCompare = 1, isCommutable = 1, Defs = [CPSR] in
+def tTST : // A8.6.230
+ T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr,
+ "tst", "\t$Rn, $Rm",
+ [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>;
+
+// Zero-extend byte
+def tUXTB : // A8.6.262
+ T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "uxtb", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (and tGPR:$Rm, 0xFF))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Zero-extend short
+def tUXTH : // A8.6.264
+ T1pIMiscEncode<{0,0,1,0,1,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "uxth", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (and tGPR:$Rm, 0xFFFF))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation.
+// Expanded after instruction selection into a branch sequence.
+let usesCustomInserter = 1 in // Expanded after instruction selection.
+ def tMOVCCr_pseudo :
+ PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
+ NoItinerary,
+ [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
+
+
+// 16-bit movcc in IT blocks for Thumb2.
+let neverHasSideEffects = 1 in {
+def tMOVCCr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iCMOVr,
+ "mov", "\t$Rdn, $Rm", []>,
+ T1Special<{1,0,?,?}> {
+ bits<4> Rdn;
+ bits<4> Rm;
+ let Inst{7} = Rdn{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rdn{2-0};
+}
+
+let isMoveImm = 1 in
+def tMOVCCi : T1pIt<(outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$Rm), IIC_iCMOVi,
+ "mov", "\t$Rdn, $Rm", []>,
+ T1General<{1,0,0,?,?}> {
+ bits<3> Rdn;
+ bits<8> Rm;
+ let Inst{10-8} = Rdn;
+ let Inst{7-0} = Rm;
+}
+
+} // neverHasSideEffects
+
+// tLEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+
+def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
+ IIC_iALUi, "adr{$p}\t$Rd, #$addr", []>,
+ T1Encoding<{1,0,1,0,0,?}> {
+ bits<3> Rd;
+ bits<8> addr;
+ let Inst{10-8} = Rd;
+ let Inst{7-0} = addr;
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
+ Size2Bytes, IIC_iALUi, []>;
+
+def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
+ (ins i32imm:$label, nohash_imm:$id, pred:$p),
+ Size2Bytes, IIC_iALUi, []>;
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register -- for disassembly only
+//
+
+class tMovRCopro<string opc, bit direction>
+ : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-24} = 0b1110;
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>;
+def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>;
+
+class tMovRRCopro<string opc, bit direction>
+ : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"),
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-24} = 0b1100;
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+def tMCRR : tMovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */>;
+def tMRRC : tMovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
+
+//===----------------------------------------------------------------------===//
+// Other Coprocessor Instructions. For disassembly only.
+//
+def tCDP : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-24} = 0b1110;
+
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+let isCall = 1, Defs = [R0, LR], Uses = [SP] in
+def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br,
+ "bl\t__aeabi_read_tp",
+ [(set R0, ARMthread_pointer)]> {
+ // Encoding is 0xf7fffffe.
+ let Inst = 0xf7fffffe;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+//
+
+// eh_sjlj_setjmp() is an instruction sequence to store the return address and
+// save #0 in R0 for the non-longjmp case. Since by its nature we may be coming
+// from some other function to get here, and we're using the stack frame for the
+// containing function to save/restore registers, we can't keep anything live in
+// regs across the eh_sjlj_setjmp(), else it will almost certainly have been
+// tromped upon when we get here from a longjmp(). We force everthing out of
+// registers except for our own input by listing the relevant registers in
+// Defs. By doing so, we also cause the prologue/epilogue code to actively
+// preserve all of the callee-saved resgisters, which is exactly what we want.
+// $val is a scratch register for our use.
+let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in
+def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
+ AddrModeNone, SizeSpecial, NoItinerary, "","",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
+
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
+ Defs = [ R7, LR, SP ] in
+def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
+ AddrModeNone, SizeSpecial, IndexModeNone,
+ Pseudo, NoItinerary, "", "",
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsThumb, IsDarwin]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// Comparisons
+def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8),
+ (tCMPi8 tGPR:$Rn, imm0_255:$imm8)>;
+def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm),
+ (tCMPr tGPR:$Rn, tGPR:$Rm)>;
+
+// Add with carry
+def : T1Pat<(addc tGPR:$lhs, imm0_7:$rhs),
+ (tADDi3 tGPR:$lhs, imm0_7:$rhs)>;
+def : T1Pat<(addc tGPR:$lhs, imm8_255:$rhs),
+ (tADDi8 tGPR:$lhs, imm8_255:$rhs)>;
+def : T1Pat<(addc tGPR:$lhs, tGPR:$rhs),
+ (tADDrr tGPR:$lhs, tGPR:$rhs)>;
+
+// Subtract with carry
+def : T1Pat<(addc tGPR:$lhs, imm0_7_neg:$rhs),
+ (tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>;
+def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs),
+ (tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>;
+def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs),
+ (tSUBrr tGPR:$lhs, tGPR:$rhs)>;
+
+// ConstantPool, GlobalAddress
+def : T1Pat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
+def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
+
+// JumpTable
+def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (tLEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Direct calls
+def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
+ Requires<[IsThumb, IsNotDarwin]>;
+def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>,
+ Requires<[IsThumb, IsDarwin]>;
+
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>,
+ Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>,
+ Requires<[IsThumb, HasV5T, IsDarwin]>;
+
+// Indirect calls to ARM routines
+def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
+ Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>,
+ Requires<[IsThumb, HasV5T, IsDarwin]>;
+
+// zextload i1 -> zextload i8
+def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
+ (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
+ (tLDRBi t_addrmode_is1:$addr)>;
+
+// extload -> zextload
+def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
+
+// If it's impossible to use [r,r] address mode for sextload, select to
+// ldr{b|h} + sxt{b|h} instead.
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+ (tSXTB (tLDRBi t_addrmode_is1:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+ (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+ (tSXTH (tLDRHi t_addrmode_is2:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+ (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+ (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+ (tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+ (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+ (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>;
+
+// Large immediate handling.
+
+// Two piece imms.
+def : T1Pat<(i32 thumb_immshifted:$src),
+ (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)),
+ (thumb_immshifted_shamt imm:$src))>;
+
+def : T1Pat<(i32 imm0_255_comp:$src),
+ (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let isReMaterializable = 1 in
+def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+ NoItinerary,
+ [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ imm:$cp))]>,
+ Requires<[IsThumb, IsThumb1Only]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
new file mode 100644
index 0000000..0e01be5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -0,0 +1,3432 @@
+//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb2 instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+// IT block predicate field
+def it_pred : Operand<i32> {
+ let PrintMethod = "printMandatoryPredicateOperand";
+}
+
+// IT block condition mask
+def it_mask : Operand<i32> {
+ let PrintMethod = "printThumbITMask";
+}
+
+// Shifted operands. No register controlled shifts for Thumb2.
+// Note: We do not support rrx shifted operands yet.
+def t2_so_reg : Operand<i32>, // reg imm
+ ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getT2SORegOpValue";
+ let PrintMethod = "printT2SOOperand";
+ let MIOperandInfo = (ops rGPR, i32imm);
+}
+
+// t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value
+def t2_so_imm_not_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+}]>;
+
+// t2_so_imm_neg_XFORM - Return the negation of a t2_so_imm value
+def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-((int)N->getZExtValue()), MVT::i32);
+}]>;
+
+// t2_so_imm - Match a 32-bit immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
+// immediate splatted into multiple bytes of the word.
+def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]> {
+ let EncoderMethod = "getT2SOImmOpValue";
+}
+
+// t2_so_imm_not - Match an immediate that is a complement
+// of a t2_so_imm.
+def t2_so_imm_not : Operand<i32>,
+ PatLeaf<(imm), [{
+ return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
+}], t2_so_imm_not_XFORM>;
+
+// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
+def t2_so_imm_neg : Operand<i32>,
+ PatLeaf<(imm), [{
+ return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
+}], t2_so_imm_neg_XFORM>;
+
+// Break t2_so_imm's up into two pieces. This handles immediates with up to 16
+// bits set in them. This uses t2_so_imm2part to match and t2_so_imm2part_[12]
+// to get the first/second pieces.
+def t2_so_imm2part : Operand<i32>,
+ PatLeaf<(imm), [{
+ return ARM_AM::isT2SOImmTwoPartVal((unsigned)N->getZExtValue());
+ }]> {
+}
+
+def t2_so_imm2part_1 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getT2SOImmTwoPartFirst((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def t2_so_imm2part_2 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getT2SOImmTwoPartSecond((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def t2_so_neg_imm2part : Operand<i32>, PatLeaf<(imm), [{
+ return ARM_AM::isT2SOImmTwoPartVal(-(int)N->getZExtValue());
+ }]> {
+}
+
+def t2_so_neg_imm2part_1 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getT2SOImmTwoPartFirst(-(int)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def t2_so_neg_imm2part_2 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getT2SOImmTwoPartSecond(-(int)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+/// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31].
+def imm1_31 : PatLeaf<(i32 imm), [{
+ return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32;
+}]>;
+
+/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
+def imm0_4095 : Operand<i32>,
+ PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getZExtValue() < 4096;
+}]>;
+
+def imm0_4095_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)(-N->getZExtValue()) < 4096;
+}], imm_neg_XFORM>;
+
+def imm0_255_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)(-N->getZExtValue()) < 255;
+}], imm_neg_XFORM>;
+
+def imm0_255_not : PatLeaf<(i32 imm), [{
+ return (uint32_t)(~N->getZExtValue()) < 255;
+}], imm_comp_XFORM>;
+
+// Define Thumb2 specific addressing modes.
+
+// t2addrmode_imm12 := reg + imm12
+def t2addrmode_imm12 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
+ let PrintMethod = "printAddrModeImm12Operand";
+ let EncoderMethod = "getAddrModeImm12OpValue";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemMode5AsmOperand;
+}
+
+// ADR instruction labels.
+def t2adrlabel : Operand<i32> {
+ let EncoderMethod = "getT2AdrLabelOpValue";
+}
+
+
+// t2addrmode_imm8 := reg +/- imm8
+def t2addrmode_imm8 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
+ let PrintMethod = "printT2AddrModeImm8Operand";
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemMode5AsmOperand;
+}
+
+def t2am_imm8_offset : Operand<i32>,
+ ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
+ [], [SDNPWantRoot]> {
+ let PrintMethod = "printT2AddrModeImm8OffsetOperand";
+ let EncoderMethod = "getT2AddrModeImm8OffsetOpValue";
+ let ParserMatchClass = MemMode5AsmOperand;
+}
+
+// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
+def t2addrmode_imm8s4 : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm8s4Operand";
+ let EncoderMethod = "getT2AddrModeImm8s4OpValue";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemMode5AsmOperand;
+}
+
+def t2am_imm8s4_offset : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
+}
+
+// t2addrmode_so_reg := reg + (reg << imm2)
+def t2addrmode_so_reg : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
+ let PrintMethod = "printT2AddrModeSoRegOperand";
+ let EncoderMethod = "getT2AddrModeSORegOpValue";
+ let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
+ let ParserMatchClass = MemMode5AsmOperand;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+
+class T2OneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+
+class T2sOneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2OneRegCmpImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+
+class T2OneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2sOneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2OneRegCmpShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2TwoReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2sTwoReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2TwoRegCmp<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+
+class T2TwoRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2sTwoRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2TwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<5> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{14-12} = imm{4-2};
+ let Inst{7-6} = imm{1-0};
+}
+
+class T2sTwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<5> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{14-12} = imm{4-2};
+ let Inst{7-6} = imm{1-0};
+}
+
+class T2ThreeReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+class T2sThreeReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+class T2TwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2sTwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2FourReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<4> Ra;
+
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{31-23} = 0b111110111;
+ let Inst{22-20} = opc22_20;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = RdHi;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
+
+
+/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// unary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_un_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
+ // shifted imm
+ def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
+ opc, "\t$Rd, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
+ let isAsCheapAsAMove = Cheap;
+ let isReMaterializable = ReMat;
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+ }
+ // register
+ def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir,
+ opc, ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis,
+ opc, ".w\t$Rd, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ }
+}
+
+/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// binary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_bin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0, string wide = ""> {
+ // shifted imm
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii,
+ opc, "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), iir,
+ opc, !strconcat(wide, "\t$Rd, $Rn, $Rm"),
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+ opc, !strconcat(wide, "\t$Rd, $Rn, $ShiftedRm"),
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ }
+}
+
+/// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need
+// the ".w" prefix to indicate that they are wide.
+multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> :
+ T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w">;
+
+/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
+/// reversed. The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the T2I_bin_irs counterpart.
+multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
+ // shifted imm
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, "\t$Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsir, opc, "\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ }
+}
+
+/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CPSR register.
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ // shifted imm
+ def ri : T2TwoRegImm<
+ (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii,
+ !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2ThreeReg<
+ (outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir,
+ !strconcat(opc, "s"), ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2TwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+ !strconcat(opc, "s"), ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ }
+}
+}
+
+/// T2I_bin_ii12rs - Defines a set of (op reg, {so_imm|imm0_4095|r|so_reg})
+/// patterns for a binary operation that produces a value.
+multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
+ bit Commutable = 0> {
+ // shifted imm
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24} = 1;
+ let Inst{23-21} = op23_21;
+ let Inst{15} = 0;
+ }
+ }
+ // 12-bit imm
+ def ri12 : T2I<
+ (outs rGPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
+ !strconcat(opc, "w"), "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = imm{11};
+ let Inst{25-24} = 0b10;
+ let Inst{23-21} = op23_21;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14-12} = imm{10-8};
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = imm{7-0};
+ }
+ // register
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24} = 1;
+ let Inst{23-21} = op23_21;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24} = 1;
+ let Inst{23-21} = op23_21;
+ }
+}
+
+/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
+/// for a binary operation that produces a value and use the carry
+/// bit. It's not predicable.
+let Uses = [CPSR] in {
+multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
+ // shifted imm
+ def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
+ IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[IsThumb2]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ }
+}
+
+// Carry setting variants
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
+ // shifted imm
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[IsThumb2]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ }
+}
+}
+}
+
+/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
+/// version is not needed since this is only for codegen.
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
+ // shifted imm
+ def ri : T2TwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{15} = 0;
+ }
+ // shifted register
+ def rs : T2TwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, !strconcat(opc, "s"), "\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ }
+}
+}
+
+/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
+// rotate operation that produces a value.
+multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
+ // 5-bit imm
+ def ri : T2sTwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$imm), IIC_iMOVsi,
+ opc, ".w\t$Rd, $Rm, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm, imm1_31:$imm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-21} = 0b010010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = opcod;
+ }
+ // register
+ def rr : T2sThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMOVsr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-21} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b0000;
+ }
+}
+
+/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to T2I_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+let isCompare = 1, Defs = [CPSR] in {
+multiclass T2I_cmp_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode> {
+ // shifted imm
+ def ri : T2OneRegCmpImm<
+ (outs), (ins GPR:$Rn, t2_so_imm:$imm), iii,
+ opc, ".w\t$Rn, $imm",
+ [(opnode GPR:$Rn, t2_so_imm:$imm)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{15} = 0;
+ let Inst{11-8} = 0b1111; // Rd
+ }
+ // register
+ def rr : T2TwoRegCmp<
+ (outs), (ins GPR:$lhs, rGPR:$rhs), iir,
+ opc, ".w\t$lhs, $rhs",
+ [(opnode GPR:$lhs, rGPR:$rhs)]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{11-8} = 0b1111; // Rd
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2OneRegCmpShiftedReg<
+ (outs), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+ opc, ".w\t$Rn, $ShiftedRm",
+ [(opnode GPR:$Rn, t2_so_reg:$ShiftedRm)]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{11-8} = 0b1111; // Rd
+ }
+}
+}
+
+/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
+multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
+ def i12 : T2Ii12<(outs GPR:$Rt), (ins t2addrmode_imm12:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode t2addrmode_imm12:$addr))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 1;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<17> addr;
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{23} = addr{12}; // U
+ let Inst{11-0} = addr{11-0}; // imm
+ }
+ def i8 : T2Ii8 <(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode t2addrmode_imm8:$addr))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{11} = 1;
+ // Offset: index==TRUE, wback==FALSE
+ let Inst{10} = 1; // The P bit.
+ let Inst{8} = 0; // The W bit.
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<13> addr;
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{9} = addr{8}; // U
+ let Inst{7-0} = addr{7-0}; // imm
+ }
+ def s : T2Iso <(outs GPR:$Rt), (ins t2addrmode_so_reg:$addr), iis,
+ opc, ".w\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{11-6} = 0b000000;
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm
+ }
+
+ // FIXME: Is the pci variant actually needed?
+ def pci : T2Ipc <(outs GPR:$Rt), (ins i32imm:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
+ let isReMaterializable = 1;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = ?; // add = (U == '1')
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{19-16} = 0b1111; // Rn
+ bits<4> Rt;
+ bits<12> addr;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-0} = addr{11-0};
+ }
+}
+
+/// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
+multiclass T2I_st<bits<2> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
+ def i12 : T2Ii12<(outs), (ins GPR:$Rt, t2addrmode_imm12:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(opnode GPR:$Rt, t2addrmode_imm12:$addr)]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0001;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0; // !load
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<17> addr;
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{23} = addr{12}; // U
+ let Inst{11-0} = addr{11-0}; // imm
+ }
+ def i8 : T2Ii8 <(outs), (ins GPR:$Rt, t2addrmode_imm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(opnode GPR:$Rt, t2addrmode_imm8:$addr)]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0000;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0; // !load
+ let Inst{11} = 1;
+ // Offset: index==TRUE, wback==FALSE
+ let Inst{10} = 1; // The P bit.
+ let Inst{8} = 0; // The W bit.
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<13> addr;
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{9} = addr{8}; // U
+ let Inst{7-0} = addr{7-0}; // imm
+ }
+ def s : T2Iso <(outs), (ins GPR:$Rt, t2addrmode_so_reg:$addr), iis,
+ opc, ".w\t$Rt, $addr",
+ [(opnode GPR:$Rt, t2addrmode_so_reg:$addr)]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0000;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0; // !load
+ let Inst{11-6} = 0b000000;
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm
+ }
+}
+
+/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> {
+ def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+ opc, ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = 0b00; // rotate
+ }
+ def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+ opc, ".w\t$Rd, $Rm, ror $rot",
+ [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
+ }
+}
+
+// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
+multiclass T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
+ def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+ opc, "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = 0b00; // rotate
+ }
+ def r_rot : T2TwoReg<(outs rGPR:$dst), (ins rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$dst, $Rm, ror $rot",
+ [(set rGPR:$dst, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
+ }
+}
+
+// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
+// supported yet.
+multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> {
+ def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+ opc, "\t$Rd, $Rm", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = 0b00; // rotate
+ }
+ def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$rot), IIC_iEXTr,
+ opc, "\t$Rd, $Rm, ror $rot", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
+ }
+}
+
+/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> {
+ def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
+ opc, "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = 0b00; // rotate
+ }
+ def rr_rot : T2ThreeReg<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn,
+ (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
+ }
+}
+
+// DO variant - disassembly only, no pattern
+
+multiclass T2I_exta_rrot_DO<bits<3> opcod, string opc> {
+ def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
+ opc, "\t$Rd, $Rn, $Rm", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = 0b00; // rotate
+ }
+ def rr_rot : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : T2XI<oops, iops, itin, asm, pattern> {
+ bits<4> Rd;
+ bits<12> label;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = label{11};
+ let Inst{14-12} = label{10-8};
+ let Inst{7-0} = label{7-0};
+}
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
+ (ins t2adrlabel:$addr, pred:$p),
+ IIC_iALUi, "adr{$p}.w\t$Rd, #$addr", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-24} = 0b10;
+ // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
+ let Inst{22} = 0;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<13> addr;
+ let Inst{11-8} = Rd;
+ let Inst{23} = addr{12};
+ let Inst{21} = addr{12};
+ let Inst{26} = addr{11};
+ let Inst{14-12} = addr{10-8};
+ let Inst{7-0} = addr{7-0};
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
+ Size4Bytes, IIC_iALUi, []>;
+def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
+ (ins i32imm:$label, nohash_imm:$id, pred:$p),
+ Size4Bytes, IIC_iALUi,
+ []>;
+
+
+// FIXME: None of these add/sub SP special instructions should be necessary
+// at all for thumb2 since they use the same encodings as the generic
+// add/sub instructions. In thumb1 we need them since they have dedicated
+// encodings. At the least, they should be pseudo instructions.
+// ADD r, sp, {so_imm|i12}
+let isCodeGenOnly = 1 in {
+def t2ADDrSPi : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm),
+ IIC_iALUi, "add", ".w\t$Rd, $Rn, $imm", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b1000;
+ let Inst{15} = 0;
+}
+def t2ADDrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm),
+ IIC_iALUi, "addw", "\t$Rd, $Rn, $imm", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-20} = 0b100000;
+ let Inst{15} = 0;
+}
+
+// ADD r, sp, so_reg
+def t2ADDrSPs : T2sTwoRegShiftedReg<
+ (outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, "add", ".w\t$Rd, $Rn, $ShiftedRm", []> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b1000;
+ let Inst{15} = 0;
+}
+
+// SUB r, sp, {so_imm|i12}
+def t2SUBrSPi : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm),
+ IIC_iALUi, "sub", ".w\t$Rd, $Rn, $imm", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b1101;
+ let Inst{15} = 0;
+}
+def t2SUBrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm),
+ IIC_iALUi, "subw", "\t$Rd, $Rn, $imm", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-20} = 0b101010;
+ let Inst{15} = 0;
+}
+
+// SUB r, sp, so_reg
+def t2SUBrSPs : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$imm),
+ IIC_iALUsi,
+ "sub", "\t$Rd, $Rn, $imm", []> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b1101;
+ let Inst{19-16} = 0b1101; // Rn = sp
+ let Inst{15} = 0;
+}
+} // end isCodeGenOnly = 1
+
+// Signed and unsigned division on v7-M
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+ "sdiv", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasDivide, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011100;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+ "udiv", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasDivide, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011101;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+// Load
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si,
+ UnOpFrag<(load node:$Src)>>;
+
+// Loads with zero extension
+defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ UnOpFrag<(zextloadi16 node:$Src)>>;
+defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ UnOpFrag<(zextloadi8 node:$Src)>>;
+
+// Loads with sign extension
+defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ UnOpFrag<(sextloadi16 node:$Src)>>;
+defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ UnOpFrag<(sextloadi8 node:$Src)>>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Load doubleword
+def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
+ (ins t2addrmode_imm8s4:$addr),
+ IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", []>;
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// zextload i1 -> zextload i8
+def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_imm8:$addr),
+ (t2LDRBi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)),
+ (t2LDRBpci tconstpool:$addr)>;
+
+// extload -> zextload
+// FIXME: Reduce the number of patterns by legalizing extload to zextload
+// earlier?
+def : T2Pat<(extloadi1 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi1 t2addrmode_imm8:$addr),
+ (t2LDRBi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi1 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi1 (ARMWrapper tconstpool:$addr)),
+ (t2LDRBpci tconstpool:$addr)>;
+
+def : T2Pat<(extloadi8 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi8 t2addrmode_imm8:$addr),
+ (t2LDRBi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi8 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi8 (ARMWrapper tconstpool:$addr)),
+ (t2LDRBpci tconstpool:$addr)>;
+
+def : T2Pat<(extloadi16 t2addrmode_imm12:$addr),
+ (t2LDRHi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_imm8:$addr),
+ (t2LDRHi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr),
+ (t2LDRHs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
+ (t2LDRHpci tconstpool:$addr)>;
+
+// FIXME: The destination register of the loads and stores can't be PC, but
+// can be SP. We need another regclass (similar to rGPR) to represent
+// that. Not a pressing issue since these are selected manually,
+// not via pattern.
+
+// Indexed loads
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
+ "ldr", "\t$Rt, $addr!", "$addr.base = $Rn",
+ []>;
+
+def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+ (ins GPR:$base, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
+ "ldr", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+ []>;
+
+def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn",
+ []>;
+def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+ (ins GPR:$base, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+ []>;
+
+def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn",
+ []>;
+def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+ (ins GPR:$base, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrh", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+ []>;
+
+def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn",
+ []>;
+def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+ (ins GPR:$base, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+ []>;
+
+def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn",
+ []>;
+def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn),
+ (ins GPR:$base, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsh", "\t$dst, [$Rn], $addr", "$base = $Rn",
+ []>;
+} // mayLoad = 1, neverHasSideEffects = 1
+
+// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are
+// for disassembly only.
+// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
+class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
+ : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+ "\t$Rt, $addr", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = type;
+ let Inst{20} = 1; // load
+ let Inst{11} = 1;
+ let Inst{10-8} = 0b110; // PUW.
+
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = addr{12-9};
+ let Inst{7-0} = addr{7-0};
+}
+
+def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>;
+def t2LDRBT : T2IldT<0, 0b00, "ldrbt", IIC_iLoad_bh_i>;
+def t2LDRHT : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>;
+def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
+def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
+
+// Store
+defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si,
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si,
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
+ BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+// Store doubleword
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
+ (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr),
+ IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>;
+
+// Indexed stores
+def t2STR_PRE : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb),
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+ "str", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
+ [(set GPR:$base_wb,
+ (pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
+ "str", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
+ [(set GPR:$base_wb,
+ (post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRH_PRE : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb),
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+ "strh", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
+ [(set GPR:$base_wb,
+ (pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+ "strh", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
+ [(set GPR:$base_wb,
+ (post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRB_PRE : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb),
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
+ "strb", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
+ [(set GPR:$base_wb,
+ (pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+ "strb", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
+ [(set GPR:$base_wb,
+ (post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
+// only.
+// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
+class T2IstT<bits<2> type, string opc, InstrItinClass ii>
+ : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+ "\t$Rt, $addr", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = 0; // not signed
+ let Inst{23} = 0;
+ let Inst{22-21} = type;
+ let Inst{20} = 0; // store
+ let Inst{11} = 1;
+ let Inst{10-8} = 0b110; // PUW
+
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = addr{12-9};
+ let Inst{7-0} = addr{7-0};
+}
+
+def t2STRT : T2IstT<0b10, "strt", IIC_iStore_i>;
+def t2STRBT : T2IstT<0b00, "strbt", IIC_iStore_bh_i>;
+def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
+
+// ldrd / strd pre / post variants
+// For disassembly only.
+
+def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs GPR:$Rt, GPR:$Rt2),
+ (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
+
+def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$Rt, GPR:$Rt2),
+ (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, [$base], $imm", []>;
+
+def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs),
+ (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
+
+def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs),
+ (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>;
+
+// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
+// data/instruction access. These are for disassembly only.
+// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
+// (prefetch 1) -> (preload 2), (prefetch 2) -> (preload 1).
+multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
+
+ def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+
+ bits<17> addr;
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{23} = addr{12}; // U
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+
+ def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_imm8:$addr, (i32 write), (i32 instr))]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = 0; // U = 0
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = 0b1100;
+
+ bits<13> addr;
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{7-0} = addr{7-0}; // imm8
+ }
+
+ def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = 0; // add = TRUE for T1
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-6} = 0000000;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm2
+ }
+}
+
+defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>;
+defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>;
+defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>;
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
+ InstrItinClass itin_upd, bit L_bit> {
+ def IA :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "ia${p}.w\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+ def IA_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "ia${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+ def DB :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "db${p}.w\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+ def DB_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "db${p}.w\t$Rn, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm t2LDM : thumb2_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm t2STM : thumb2_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
+
+} // neverHasSideEffects
+
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+
+let neverHasSideEffects = 1 in
+def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+ "mov", ".w\t$Rd, $Rm", []> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000;
+ let Inst{7-4} = 0b0000;
+}
+
+// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
+ AddedComplexity = 1 in
+def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
+ "mov", ".w\t$Rd, $imm",
+ [(set rGPR:$Rd, t2_so_imm:$imm)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b0010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi,
+ "movw", "\t$Rd, $imm",
+ [(set rGPR:$Rd, imm0_65535:$imm)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def t2MOVTi16 : T2I<(outs rGPR:$Rd),
+ (ins rGPR:$src, i32imm_hilo16:$imm), IIC_iMOVi,
+ "movt", "\t$Rd, $imm",
+ [(set rGPR:$Rd,
+ (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0110;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+} // Constraints
+
+def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
+
+//===----------------------------------------------------------------------===//
+// Extend Instructions.
+//
+
+// Sign extenders
+
+defm t2SXTB : T2I_ext_rrot<0b100, "sxtb",
+ UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm t2SXTH : T2I_ext_rrot<0b000, "sxth",
+ UnOpFrag<(sext_inreg node:$Src, i16)>>;
+defm t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
+
+defm t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
+ BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
+ BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+defm t2SXTAB16 : T2I_exta_rrot_DO<0b010, "sxtab16">;
+
+// TODO: SXT(A){B|H}16 - done for disassembly only
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+defm t2UXTB : T2I_ext_rrot<0b101, "uxtb",
+ UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm t2UXTH : T2I_ext_rrot<0b001, "uxth",
+ UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
+ UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
+// (t2UXTB16r_rot rGPR:$Src, 24)>,
+// Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
+ (t2UXTB16r_rot rGPR:$Src, 8)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+defm t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+defm t2UXTAB16 : T2I_exta_rrot_DO<0b011, "uxtab16">;
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+defm t2ADD : T2I_bin_ii12rs<0b000, "add",
+ BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
+defm t2SUB : T2I_bin_ii12rs<0b101, "sub",
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
+defm t2ADDS : T2I_bin_s_irs <0b1000, "add",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+ BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
+defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+ BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+defm t2ADC : T2I_adde_sube_irs<0b1010, "adc",
+ BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
+defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc",
+ BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
+defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc",
+ BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
+defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc",
+ BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
+
+// RSB
+defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
+ BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+// The AddedComplexity preferences the first variant over the others since
+// it can be shrunk to a 16-bit wide encoding, while the others cannot.
+let AddedComplexity = 1 in
+def : T2Pat<(add GPR:$src, imm0_255_neg:$imm),
+ (t2SUBri GPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
+ (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(addc rGPR:$src, imm0_255_neg:$imm),
+ (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+let AddedComplexity = 1 in
+def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm),
+ (t2SBCSri rGPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm),
+ (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>;
+
+// Select Bytes -- for disassembly only
+
+def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b010;
+ let Inst{23} = 0b1;
+ let Inst{22-20} = 0b010;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 0b1;
+ let Inst{6-4} = 0b000;
+}
+
+// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
+// And Miscellaneous operations -- for disassembly only
+class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
+ list<dag> pat = [/* For disassembly only; pattern left blank */],
+ dag iops = (ins rGPR:$Rn, rGPR:$Rm),
+ string asm = "\t$Rd, $Rn, $Rm">
+ : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0101;
+ let Inst{22-20} = op22_20;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = op7_4;
+
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def t2QADD : T2I_pam<0b000, 0b1000, "qadd",
+ [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">;
+def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">;
+def t2QASX : T2I_pam<0b010, 0b0001, "qasx">;
+def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd", [],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub", [],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">;
+def t2QSUB : T2I_pam<0b000, 0b1010, "qsub",
+ [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">;
+def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">;
+def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
+def t2UQADD8 : T2I_pam<0b000, 0b0101, "uqadd8">;
+def t2UQASX : T2I_pam<0b010, 0b0101, "uqasx">;
+def t2UQSAX : T2I_pam<0b110, 0b0101, "uqsax">;
+def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">;
+def t2UQSUB8 : T2I_pam<0b100, 0b0101, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def t2SASX : T2I_pam<0b010, 0b0000, "sasx">;
+def t2SADD16 : T2I_pam<0b001, 0b0000, "sadd16">;
+def t2SADD8 : T2I_pam<0b000, 0b0000, "sadd8">;
+def t2SSAX : T2I_pam<0b110, 0b0000, "ssax">;
+def t2SSUB16 : T2I_pam<0b101, 0b0000, "ssub16">;
+def t2SSUB8 : T2I_pam<0b100, 0b0000, "ssub8">;
+def t2UASX : T2I_pam<0b010, 0b0100, "uasx">;
+def t2UADD16 : T2I_pam<0b001, 0b0100, "uadd16">;
+def t2UADD8 : T2I_pam<0b000, 0b0100, "uadd8">;
+def t2USAX : T2I_pam<0b110, 0b0100, "usax">;
+def t2USUB16 : T2I_pam<0b101, 0b0100, "usub16">;
+def t2USUB8 : T2I_pam<0b100, 0b0100, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def t2SHASX : T2I_pam<0b010, 0b0010, "shasx">;
+def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">;
+def t2SHADD8 : T2I_pam<0b000, 0b0010, "shadd8">;
+def t2SHSAX : T2I_pam<0b110, 0b0010, "shsax">;
+def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">;
+def t2SHSUB8 : T2I_pam<0b100, 0b0010, "shsub8">;
+def t2UHASX : T2I_pam<0b010, 0b0110, "uhasx">;
+def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">;
+def t2UHADD8 : T2I_pam<0b000, 0b0110, "uhadd8">;
+def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">;
+def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">;
+def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">;
+
+// Helper class for disassembly only
+// A6.3.16 & A6.3.17
+// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions.
+class T2ThreeReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b011;
+ let Inst{23} = long;
+ let Inst{22-20} = op22_20;
+ let Inst{7-4} = op7_4;
+}
+
+class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2FourReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b011;
+ let Inst{23} = long;
+ let Inst{22-20} = op22_20;
+ let Inst{7-4} = op7_4;
+}
+
+// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+
+def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm),
+ NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []> {
+ let Inst{15-12} = 0b1111;
+}
+def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
+ "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>;
+
+// Signed/Unsigned saturate -- for disassembly only
+
+class T2SatI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> sat_imm;
+ bits<7> sh;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{4-0} = sat_imm{4-0};
+ let Inst{21} = sh{6};
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
+}
+
+def t2SSAT: T2SatI<
+ (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
+ NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+}
+
+def t2SSAT16: T2SatI<
+ (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn), NoItinerary,
+ "ssat16", "\t$Rd, $sat_imm, $Rn",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+ let Inst{14-12} = 0b000; // imm3 = '000'
+ let Inst{7-6} = 0b00; // imm2 = '00'
+}
+
+def t2USAT: T2SatI<
+ (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
+ NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+}
+
+def t2USAT16: T2SatI<
+ (outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn), NoItinerary,
+ "usat16", "\t$dst, $sat_imm, $Rn",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+ let Inst{14-12} = 0b000; // imm3 = '000'
+ let Inst{7-6} = 0b00; // imm2 = '00'
+}
+
+def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Shift and rotate Instructions.
+//
+
+defm t2LSL : T2I_sh_ir<0b00, "lsl", BinOpFrag<(shl node:$LHS, node:$RHS)>>;
+defm t2LSR : T2I_sh_ir<0b01, "lsr", BinOpFrag<(srl node:$LHS, node:$RHS)>>;
+defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>;
+defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
+
+let Uses = [CPSR] in {
+def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "rrx", "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000;
+ let Inst{7-4} = 0b0011;
+}
+}
+
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+def t2MOVsrl_flag : T2TwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "lsrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 1; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = 0b01; // Shift type.
+ // Shift amount = Inst{14-12:7-6} = 1.
+ let Inst{14-12} = 0b000;
+ let Inst{7-6} = 0b01;
+}
+def t2MOVsra_flag : T2TwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "asrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 1; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = 0b10; // Shift type.
+ // Shift amount = Inst{14-12:7-6} = 1.
+ let Inst{14-12} = 0b000;
+ let Inst{7-6} = 0b01;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Bitwise Instructions.
+//
+
+defm t2AND : T2I_bin_w_irs<0b0000, "and",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+defm t2ORR : T2I_bin_w_irs<0b0010, "orr",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
+defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
+
+defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+
+class T2BitFI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<5> msb;
+ bits<5> lsb;
+
+ let Inst{11-8} = Rd;
+ let Inst{4-0} = msb{4-0};
+ let Inst{14-12} = lsb{4-2};
+ let Inst{7-6} = lsb{1-0};
+}
+
+class T2TwoRegBitFI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2BitFI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+
+ let Inst{19-16} = Rn;
+}
+
+let Constraints = "$src = $Rd" in
+def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
+ IIC_iUNAsi, "bfc", "\t$Rd, $imm",
+ [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10110;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+
+ bits<10> imm;
+ let msb{4-0} = imm{9-5};
+ let lsb{4-0} = imm{4-0};
+}
+
+def t2SBFX: T2TwoRegBitFI<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+ IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10100;
+ let Inst{15} = 0;
+}
+
+def t2UBFX: T2TwoRegBitFI<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+ IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b11100;
+ let Inst{15} = 0;
+}
+
+// A8.6.18 BFI - Bitfield insert (Encoding T1)
+let Constraints = "$src = $Rd" in {
+ def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd),
+ (ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm),
+ IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn,
+ bf_inv_mask_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10110;
+ let Inst{15} = 0;
+
+ bits<10> imm;
+ let msb{4-0} = imm{9-5};
+ let lsb{4-0} = imm{4-0};
+ }
+
+ // GNU as only supports this form of bfi (w/ 4 arguments)
+ let isAsmParserOnly = 1 in
+ def t2BFI4p : T2TwoRegBitFI<(outs rGPR:$Rd),
+ (ins rGPR:$src, rGPR:$Rn, lsb_pos_imm:$lsbit,
+ width_imm:$width),
+ IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width",
+ []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10110;
+ let Inst{15} = 0;
+
+ bits<5> lsbit;
+ bits<5> width;
+ let msb{4-0} = width; // Custom encoder => lsb+width-1
+ let lsb{4-0} = lsbit;
+ }
+}
+
+defm t2ORN : T2I_bin_irs<0b0011, "orn",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">;
+
+// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
+let AddedComplexity = 1 in
+defm t2MVN : T2I_un_irs <0b0011, "mvn",
+ IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
+ UnOpFrag<(not node:$Src)>, 1, 1>;
+
+
+let AddedComplexity = 1 in
+def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
+ (t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
+
+// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
+def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
+ (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
+ Requires<[IsThumb2]>;
+
+def : T2Pat<(t2_so_imm_not:$src),
+ (t2MVNi t2_so_imm_not:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Multiply Instructions.
+//
+let isCommutable = 1 in
+def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "mul", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b000;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0000; // Multiply
+}
+
+def t2MLA: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "mla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b000;
+ let Inst{7-4} = 0b0000; // Multiply
+}
+
+def t2MLS: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "mls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b000;
+ let Inst{7-4} = 0b0001; // Multiply and Subtract
+}
+
+// Extra precision multiplies with low / high results
+let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
+def t2SMULL : T2MulLong<0b000, 0b0000,
+ (outs rGPR:$Rd, rGPR:$Ra),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+ "smull", "\t$Rd, $Ra, $Rn, $Rm", []>;
+
+def t2UMULL : T2MulLong<0b010, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+} // isCommutable
+
+// Multiply + accumulate
+def t2SMLAL : T2MulLong<0b100, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMLAL : T2MulLong<0b110, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMAAL : T2MulLong<0b110, 0b0110,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+} // neverHasSideEffects
+
+// Rounding variants of the below included for disassembly only
+
+// Most significant word multiply
+def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "smmul", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "smmulr", "\t$Rd, $Rn, $Rm", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+def t2SMMLA : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMLAR: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+def t2SMMLS: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b110;
+ let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMLSR:T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b110;
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+multiclass T2I_smul<string opc, PatFrag opnode> {
+ def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sext_inreg rGPR:$Rm, i16)))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sra rGPR:$Rm, (i32 16))))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+
+ def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sext_inreg rGPR:$Rm, i16)))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b10;
+ }
+
+ def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sra rGPR:$Rm, (i32 16))))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b11;
+ }
+
+ def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+ (sext_inreg rGPR:$Rm, i16)), (i32 16)))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+ (sra rGPR:$Rm, (i32 16))), (i32 16)))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+}
+
+
+multiclass T2I_smla<string opc, PatFrag opnode> {
+ def BB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra,
+ (opnode (sext_inreg rGPR:$Rn, i16),
+ (sext_inreg rGPR:$Rm, i16))))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def BT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sra rGPR:$Rm, (i32 16)))))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+
+ def TB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sext_inreg rGPR:$Rm, i16))))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b10;
+ }
+
+ def TT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sra rGPR:$Rm, (i32 16)))))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b11;
+ }
+
+ def WB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+ (sext_inreg rGPR:$Rm, i16)), (i32 16))))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def WT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+ (sra rGPR:$Rm, (i32 16))), (i32 16))))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+}
+
+defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
+def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>;
+
+// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+// These are for disassembly only.
+
+def t2SMUAD: T2ThreeReg_mac<
+ 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUADX:T2ThreeReg_mac<
+ 0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUSD: T2ThreeReg_mac<
+ 0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUSDX:T2ThreeReg_mac<
+ 0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMLAD : T2ThreeReg_mac<
+ 0, 0b010, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
+ "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLADX : T2FourReg_mac<
+ 0, 0b010, 0b0001, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
+ "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLSD : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
+ "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
+ "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rm, rGPR:$Rn), IIC_iMAC64, "smlald",
+ "\t$Ra, $Rd, $Rm, $Rn", []>;
+def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlaldx",
+ "\t$Ra, $Rd, $Rm, $Rn", []>;
+def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsld",
+ "\t$Ra, $Rd, $Rm, $Rn", []>;
+def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
+ "\t$Ra, $Rd, $Rm, $Rn", []>;
+
+//===----------------------------------------------------------------------===//
+// Misc. Arithmetic Instructions.
+//
+
+class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-22} = 0b01010;
+ let Inst{21-20} = op1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-6} = 0b10;
+ let Inst{5-4} = op2;
+ let Rn{3-0} = Rm;
+}
+
+def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>;
+
+def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rbit", "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>;
+
+def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>;
+
+def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rev16", ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd,
+ (or (and (srl rGPR:$Rm, (i32 8)), 0xFF),
+ (or (and (shl rGPR:$Rm, (i32 8)), 0xFF00),
+ (or (and (srl rGPR:$Rm, (i32 8)), 0xFF0000),
+ (and (shl rGPR:$Rm, (i32 8)), 0xFF000000)))))]>;
+
+def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "revsh", ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd,
+ (sext_inreg
+ (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)),
+ (shl rGPR:$Rm, (i32 8))), i16))]>;
+
+def t2PKHBT : T2ThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+ IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+ [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
+ (and (shl rGPR:$Rm, lsl_amt:$sh),
+ 0xFFFF0000)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-20} = 0b01100;
+ let Inst{5} = 0; // BT form
+ let Inst{4} = 0;
+
+ bits<8> sh;
+ let Inst{14-12} = sh{7-5};
+ let Inst{7-6} = sh{4-3};
+}
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def t2PKHTB : T2ThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+ IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+ [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
+ (and (sra rGPR:$Rm, asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-20} = 0b01100;
+ let Inst{5} = 1; // TB form
+ let Inst{4} = 0;
+
+ bits<8> sh;
+ let Inst{14-12} = sh{7-5};
+ let Inst{7-6} = sh{4-3};
+}
+
+// Alternate cases for PKHTB where identities eliminate some nodes. Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
+ (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+
+def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_imm:$imm),
+ (t2CMPri GPR:$lhs, t2_so_imm:$imm)>;
+def : T2Pat<(ARMcmpZ GPR:$lhs, rGPR:$rhs),
+ (t2CMPrr GPR:$lhs, rGPR:$rhs)>;
+def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_reg:$rhs),
+ (t2CMPrs GPR:$lhs, t2_so_reg:$rhs)>;
+
+//FIXME: Disable CMN, as CCodes are backwards from compare expectations
+// Compare-to-zero still works out, just not the relationals
+//defm t2CMN : T2I_cmp_irs<0b1000, "cmn",
+// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+defm t2CMNz : T2I_cmp_irs<0b1000, "cmn",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
+ BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
+
+//def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
+// (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
+
+def : T2Pat<(ARMcmpZ GPR:$src, t2_so_imm_neg:$imm),
+ (t2CMNzri GPR:$src, t2_so_imm_neg:$imm)>;
+
+defm t2TST : T2I_cmp_irs<0b0000, "tst",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
+defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
+def t2MOVCCr : T2TwoReg<
+ (outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm), IIC_iCMOVr,
+ "mov", ".w\t$Rd, $Rm",
+ [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd"> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000;
+ let Inst{7-4} = 0b0000;
+}
+
+let isMoveImm = 1 in
+def t2MOVCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
+ IIC_iCMOVi, "mov", ".w\t$Rd, $imm",
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd"> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+}
+
+let isMoveImm = 1 in
+def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm_hilo16:$imm),
+ IIC_iCMOVi,
+ "movw", "\t$Rd, $imm", []>,
+ RegConstraint<"$false = $Rd"> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+let isMoveImm = 1 in
+def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst),
+ (ins rGPR:$false, i32imm:$src, pred:$p),
+ IIC_iCMOVix2, []>, RegConstraint<"$false = $dst">;
+
+let isMoveImm = 1 in
+def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
+ IIC_iCMOVi, "mvn", ".w\t$Rd, $imm",
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm,
+ imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd"> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b0011;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+}
+
+class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = opcod; // Shift type.
+}
+def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "lsl", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "lsr", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "asr", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+ "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+ Requires<[IsThumb, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf3bf8f5;
+ let Inst{3-0} = opt;
+}
+}
+
+def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+ "dsb", "\t$opt",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf3bf8f4;
+ let Inst{3-0} = opt;
+}
+
+// ISB has only full system option -- for disassembly only
+def t2ISB : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "isb", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasV7]> {
+ let Inst{31-4} = 0xf3bf8f6;
+ let Inst{3-0} = 0b1111;
+}
+
+class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern, bits<4> rt2 = 0b1111>
+ : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001101;
+ let Inst{11-8} = rt2;
+ let Inst{7-6} = 0b01;
+ let Inst{5-4} = opcod;
+ let Inst{3-0} = 0b1111;
+
+ bits<4> Rn;
+ bits<4> Rt;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt;
+}
+class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern, bits<4> rt2 = 0b1111>
+ : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001100;
+ let Inst{11-8} = rt2;
+ let Inst{7-6} = 0b01;
+ let Inst{5-4} = opcod;
+
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rt;
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt;
+}
+
+let mayLoad = 1 in {
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
+ Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, [$Rn]",
+ "", []>;
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
+ Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, [$Rn]",
+ "", []>;
+def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
+ Size4Bytes, NoItinerary,
+ "ldrex", "\t$Rt, [$Rn]", "",
+ []> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000101;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = 0b00000000; // imm8 = 0
+
+ bits<4> Rn;
+ bits<4> Rt;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt;
+}
+def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins rGPR:$Rn),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "ldrexd", "\t$Rt, $Rt2, [$Rn]", "",
+ [], {?, ?, ?, ?}> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
+}
+
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "strexb", "\t$Rd, $Rt, [$Rn]", "", []>;
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "strexh", "\t$Rd, $Rt, [$Rn]", "", []>;
+def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "strex", "\t$Rd, $Rt, [$Rn]", "",
+ []> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000100;
+ let Inst{7-0} = 0b00000000; // imm8 = 0
+
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rt;
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt;
+}
+def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, rGPR:$Rt2, rGPR:$Rn),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]", "", [],
+ {?, ?, ?, ?}> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
+}
+
+// Clear-Exclusive is for disassembly only.
+def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasV7]> {
+ let Inst{31-16} = 0xf3bf;
+ let Inst{15-14} = 0b10;
+ let Inst{13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-4} = 0b0010;
+ let Inst{3-0} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+let isCall = 1,
+ Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
+ def t2TPsoft : T2XI<(outs), (ins), IIC_Br,
+ "bl\t__aeabi_read_tp",
+ [(set R0, ARMthread_pointer)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{15-14} = 0b11;
+ let Inst{12} = 1;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+// eh_sjlj_setjmp() is an instruction sequence to store the return
+// address and save #0 in R0 for the non-longjmp case.
+// Since by its nature we may be coming from some other function to get
+// here, and we're using the stack frame for the containing function to
+// save/restore registers, we can't keep anything live in regs across
+// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+// when we get here from a longjmp(). We force everthing out of registers
+// except for our own input by listing the relevant registers in Defs. By
+// doing so, we also cause the prologue/epilogue code to actively preserve
+// all of the callee-saved resgisters, which is exactly what we want.
+// $val is a scratch register for our use.
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0,
+ D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
+ D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
+ D31 ], hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+ def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+ AddrModeNone, SizeSpecial, NoItinerary, "", "",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
+ Requires<[IsThumb2, HasVFP2]>;
+}
+
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+ def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+ AddrModeNone, SizeSpecial, NoItinerary, "", "",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
+ Requires<[IsThumb2, NoVFP]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Control-Flow Instructions
+//
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: $dst1 should be a def. But the extra ops must be in the end of the
+// operand list.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+def t2LDMIA_RET: T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+ reglist:$regs, variable_ops),
+ IIC_iLoad_mBr,
+ "ldmia${p}.w\t$Rn!, $regs",
+ "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+let isPredicable = 1 in
+def t2B : T2XI<(outs), (ins uncondbrtarget:$target), IIC_Br,
+ "b.w\t$target",
+ [(br bb:$target)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 1;
+
+ bits<20> target;
+ let Inst{26} = target{19};
+ let Inst{11} = target{18};
+ let Inst{13} = target{17};
+ let Inst{21-16} = target{16-11};
+ let Inst{10-0} = target{10-0};
+}
+
+let isNotDuplicable = 1, isIndirectBranch = 1 in {
+def t2BR_JT : t2PseudoInst<(outs),
+ (ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id),
+ SizeSpecial, IIC_Br,
+ [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>;
+
+// FIXME: Add a non-pc based case that can be predicated.
+def t2TBB_JT : t2PseudoInst<(outs),
+ (ins GPR:$index, i32imm:$jt, i32imm:$id),
+ SizeSpecial, IIC_Br, []>;
+
+def t2TBH_JT : t2PseudoInst<(outs),
+ (ins GPR:$index, i32imm:$jt, i32imm:$id),
+ SizeSpecial, IIC_Br, []>;
+
+def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
+ "tbb", "\t[$Rn, $Rm]", []> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111010001101;
+ let Inst{19-16} = Rn;
+ let Inst{15-5} = 0b11110000000;
+ let Inst{4} = 0; // B form
+ let Inst{3-0} = Rm;
+}
+
+def t2TBH : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
+ "tbh", "\t[$Rn, $Rm, lsl #1]", []> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111010001101;
+ let Inst{19-16} = Rn;
+ let Inst{15-5} = 0b11110000000;
+ let Inst{4} = 1; // H form
+ let Inst{3-0} = Rm;
+}
+} // isNotDuplicable, isIndirectBranch
+
+} // isBranch, isTerminator, isBarrier
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
+ "b", ".w\t$target",
+ [/*(ARMbrcond bb:$target, imm:$cc)*/]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+
+ bits<4> p;
+ let Inst{25-22} = p;
+
+ bits<21> target;
+ let Inst{26} = target{20};
+ let Inst{11} = target{19};
+ let Inst{13} = target{18};
+ let Inst{21-16} = target{17-12};
+ let Inst{10-0} = target{11-1};
+}
+
+
+// IT block
+let Defs = [ITSTATE] in
+def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
+ AddrModeNone, Size2Bytes, IIC_iALUx,
+ "it$mask\t$cc", "", []> {
+ // 16-bit instruction.
+ let Inst{31-16} = 0x0000;
+ let Inst{15-8} = 0b10111111;
+
+ bits<4> cc;
+ bits<4> mask;
+ let Inst{7-4} = cc;
+ let Inst{3-0} = mask;
+}
+
+// Branch and Exchange Jazelle -- for disassembly only
+// Rm = Inst{19-16}
+def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-20} = 0b111100;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+
+ bits<4> func;
+ let Inst{19-16} = func;
+}
+
+// Change Processor State is a system instruction -- for disassembly and
+// parsing only.
+// FIXME: Since the asm parser has currently no clean way to handle optional
+// operands, create 3 versions of the same instruction. Once there's a clean
+// framework to represent optional operands, change this behavior.
+class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
+ !strconcat("cps", asm_op),
+ [/* For disassembly only; pattern left blank */]> {
+ bits<2> imod;
+ bits<3> iflags;
+ bits<5> mode;
+ bit M;
+
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-20} = 0b111010;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+ let Inst{10-9} = imod;
+ let Inst{8} = M;
+ let Inst{7-5} = iflags;
+ let Inst{4-0} = mode;
+}
+
+let M = 1 in
+ def t2CPS3p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+ "$imod.w\t$iflags, $mode">;
+let mode = 0, M = 0 in
+ def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags),
+ "$imod.w\t$iflags">;
+let imod = 0, iflags = 0, M = 1 in
+ def t2CPS1p : t2CPS<(ins i32imm:$mode), "\t$mode">;
+
+// A6.3.4 Branches and miscellaneous control
+// Table A6-14 Change Processor State, and hint instructions
+// Helper class for disassembly only.
+class T2I_hint<bits<8> op7_0, string opc, string asm>
+ : T2I<(outs), (ins), NoItinerary, opc, asm,
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-20} = 0xf3a;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+ let Inst{10-8} = 0b000;
+ let Inst{7-0} = op7_0;
+}
+
+def t2NOP : T2I_hint<0b00000000, "nop", ".w">;
+def t2YIELD : T2I_hint<0b00000001, "yield", ".w">;
+def t2WFE : T2I_hint<0b00000010, "wfe", ".w">;
+def t2WFI : T2I_hint<0b00000011, "wfi", ".w">;
+def t2SEV : T2I_hint<0b00000100, "sev", ".w">;
+
+def t2DBG : T2I<(outs),(ins i32imm:$opt), NoItinerary, "dbg", "\t$opt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-20} = 0xf3a;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+ let Inst{10-8} = 0b000;
+ let Inst{7-4} = 0b1111;
+
+ bits<4> opt;
+ let Inst{3-0} = opt;
+}
+
+// Secure Monitor Call is a system instruction -- for disassembly only
+// Option = Inst{19-16}
+def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26-20} = 0b1111111;
+ let Inst{15-12} = 0b1000;
+
+ bits<4> opt;
+ let Inst{19-16} = opt;
+}
+
+class T2SRS<bits<12> op31_20,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-20} = op31_20{11-0};
+
+ bits<5> mode;
+ let Inst{4-0} = mode{4-0};
+}
+
+// Store Return State is a system instruction -- for disassembly only
+def t2SRSDBW : T2SRS<0b111010000010,
+ (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SRSDB : T2SRS<0b111010000000,
+ (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SRSIAW : T2SRS<0b111010011010,
+ (outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SRSIA : T2SRS<0b111010011000,
+ (outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
+ [/* For disassembly only; pattern left blank */]>;
+
+// Return From Exception is a system instruction -- for disassembly only
+
+class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-20} = op31_20{11-0};
+
+ bits<4> Rn;
+ let Inst{19-16} = Rn;
+}
+
+def t2RFEDBW : T2RFE<0b111010000011,
+ (outs), (ins rGPR:$Rn), NoItinerary, "rfedb", "\t$Rn!",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEDB : T2RFE<0b111010000001,
+ (outs), (ins rGPR:$Rn), NoItinerary, "rfeab", "\t$Rn",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEIAW : T2RFE<0b111010011011,
+ (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn!",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEIA : T2RFE<0b111010011001,
+ (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn",
+ [/* For disassembly only; pattern left blank */]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// 32-bit immediate using movw + movt.
+// This is a single pseudo instruction to make it re-materializable.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+ [(set rGPR:$dst, (i32 imm:$src))]>,
+ Requires<[IsThumb, HasV6T2]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if pic).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2addpc,
+ [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsThumb2, UseMovt]>;
+
+def t2MOV_ga_dyn : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2,
+ [(set rGPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+ Requires<[IsThumb2, UseMovt]>;
+}
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
+ Requires<[IsThumb2, DontUseMovt]>;
+def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>;
+def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
+ Requires<[IsThumb2, UseMovt]>;
+
+def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+ IIC_iLoadiALU,
+ [(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ imm:$cp))]>,
+ Requires<[IsThumb2]>;
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+
+class T2SpecialReg<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-20} = op31_20{11-0};
+ let Inst{15-14} = op15_14{1-0};
+ let Inst{12} = op12{0};
+}
+
+class T2MRS<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2SpecialReg<op31_20, op15_14, op12, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = 0b1111;
+}
+
+def t2MRS : T2MRS<0b111100111110, 0b10, 0,
+ (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
+ [/* For disassembly only; pattern left blank */]>;
+def t2MRSsys : T2MRS<0b111100111111, 0b10, 0,
+ (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
+ [/* For disassembly only; pattern left blank */]>;
+
+// Move from ARM core register to Special Register
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */,
+ 0 /* op12 */, (outs), (ins msr_mask:$mask, rGPR:$Rn),
+ NoItinerary, "msr", "\t$mask, $Rn",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<5> mask;
+ bits<4> Rn;
+ let Inst{19-16} = Rn;
+ let Inst{20} = mask{4}; // R Bit
+ let Inst{13} = 0b0;
+ let Inst{11-8} = mask{3-0};
+}
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register -- for disassembly only
+//
+
+class t2MovRCopro<string opc, bit direction>
+ : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-24} = 0b1110;
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */>;
+def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */>;
+
+class t2MovRRCopro<string opc, bit direction>
+ : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"),
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-24} = 0b1100;
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+def t2MCRR2 : t2MovRRCopro<"mcrr2",
+ 0 /* from ARM core register to coprocessor */>;
+def t2MRRC2 : t2MovRRCopro<"mrrc2",
+ 1 /* from coprocessor to ARM core register */>;
+
+//===----------------------------------------------------------------------===//
+// Other Coprocessor Instructions. For disassembly only.
+//
+
+def t2CDP2 : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-24} = 0b1110;
+
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
new file mode 100644
index 0000000..920c5c9
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -0,0 +1,1129 @@
+//===- ARMInstrVFP.td - VFP support for ARM ----------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM VFP instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+def SDT_FTOI : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+
+def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
+def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
+def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
+def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
+def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
+def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+def vfp_f32imm : Operand<f32>,
+ PatLeaf<(f32 fpimm), [{
+ return ARM::getVFPf32Imm(N->getValueAPF()) != -1;
+ }]> {
+ let PrintMethod = "printVFPf32ImmOperand";
+}
+
+def vfp_f64imm : Operand<f64>,
+ PatLeaf<(f64 fpimm), [{
+ return ARM::getVFPf64Imm(N->getValueAPF()) != -1;
+ }]> {
+ let PrintMethod = "printVFPf64ImmOperand";
+}
+
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+
+def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
+ IIC_fpLoad64, "vldr", ".64\t$Dd, $addr",
+ [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
+
+def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
+ IIC_fpLoad32, "vldr", ".32\t$Sd, $addr",
+ [(set SPR:$Sd, (load addrmode5:$addr))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
+
+def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
+ IIC_fpStore64, "vstr", ".64\t$Dd, $addr",
+ [(store (f64 DPR:$Dd), addrmode5:$addr)]>;
+
+def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
+ IIC_fpStore32, "vstr", ".32\t$Sd, $addr",
+ [(store SPR:$Sd, addrmode5:$addr)]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass vfp_ldst_mult<string asm, bit L_bit,
+ InstrItinClass itin, InstrItinClass itin_upd> {
+ // Double Precision
+ def DIA :
+ AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DIA_UPD :
+ AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def DDB :
+ AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DDB_UPD :
+ AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+
+ // Single Precision
+ def SIA :
+ AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+ def SIA_UPD :
+ AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+ def SDB :
+ AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+ def SDB_UPD :
+ AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>;
+
+} // neverHasSideEffects
+
+def : MnemonicAlias<"vldm", "vldmia">;
+def : MnemonicAlias<"vstm", "vstmia">;
+
+// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
+
+//===----------------------------------------------------------------------===//
+// FP Binary Operations.
+//
+
+def VADDD : ADbI<0b11100, 0b11, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VADDS : ASbIn<0b11100, 0b11, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VSUBD : ADbI<0b11100, 0b11, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VDIVD : ADbI<0b11101, 0b00, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VDIVS : ASbI<0b11101, 0b00, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+
+def VMULD : ADbI<0b11100, 0b10, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VMULS : ASbIn<0b11100, 0b10, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VNMULD : ADbI<0b11100, 0b10, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
+
+def VNMULS : ASbI<0b11100, 0b10, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+// Match reassociated forms only if not sign dependent rounding.
+def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
+ (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+def : Pat<(fmul (fneg SPR:$a), SPR:$b),
+ (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+
+// These are encoded as unary instructions.
+let Defs = [FPSCR] in {
+def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
+ (outs), (ins DPR:$Dd, DPR:$Dm),
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
+
+def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
+ (outs), (ins DPR:$Dd, DPR:$Dm),
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+} // Defs = [FPSCR]
+
+//===----------------------------------------------------------------------===//
+// FP Unary Operations.
+//
+
+def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>;
+
+def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fabs SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+let Defs = [FPSCR] in {
+def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
+ (outs), (ins DPR:$Dd),
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
+ [(arm_cmpfp0 (f64 DPR:$Dd))]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
+
+def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
+ [(arm_cmpfp0 SPR:$Sd)]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
+ (outs), (ins DPR:$Dd),
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
+
+def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+} // Defs = [FPSCR]
+
+def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
+ [(set DPR:$Dd, (fextend SPR:$Sm))]> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+}
+
+// Special case encoding: bits 11-8 is 0b1011.
+def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
+ IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (fround DPR:$Dm))]> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = 0b11101;
+ let Inst{21-16} = 0b110111;
+ let Inst{11-8} = 0b1011;
+ let Inst{7-6} = 0b11;
+ let Inst{4} = 0;
+}
+
+// Between half-precision and single-precision. For disassembly only.
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+ /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a",
+ [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f32_to_f16 SPR:$a),
+ (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+ /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a",
+ [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f16_to_f32 GPR:$a),
+ (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+ /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+ /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>;
+
+def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fneg SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
+
+def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
+
+let neverHasSideEffects = 1 in {
+def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
+
+def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// FP <-> GPR Copies. Int <-> FP Conversions.
+//
+
+def VMOVRS : AVConv2I<0b11100001, 0b1010,
+ (outs GPR:$Rt), (ins SPR:$Sn),
+ IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
+ [(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
+ // Instruction operands.
+ bits<4> Rt;
+ bits<5> Sn;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+}
+
+def VMOVSR : AVConv4I<0b11100000, 0b1010,
+ (outs SPR:$Sn), (ins GPR:$Rt),
+ IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
+ [(set SPR:$Sn, (bitconvert GPR:$Rt))]> {
+ // Instruction operands.
+ bits<5> Sn;
+ bits<4> Rt;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+}
+
+let neverHasSideEffects = 1 in {
+def VMOVRRD : AVConv3I<0b11000101, 0b1011,
+ (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
+ IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
+ [/* FIXME: Can't write pattern for multiple result instr*/]> {
+ // Instruction operands.
+ bits<5> Dm;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
+ let Inst{7-6} = 0b00;
+}
+
+def VMOVRRS : AVConv3I<0b11000101, 0b1010,
+ (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2),
+ IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{7-6} = 0b00;
+}
+} // neverHasSideEffects
+
+// FMDHR: GPR -> SPR
+// FMDLR: GPR -> SPR
+
+def VMOVDRR : AVConv5I<0b11000100, 0b1011,
+ (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
+ IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
+ [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
+ // Instruction operands.
+ bits<5> Dm;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
+ let Inst{7-6} = 0b00;
+}
+
+let neverHasSideEffects = 1 in
+def VMOVSRR : AVConv5I<0b11000100, 0b1010,
+ (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
+ IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{7-6} = 0b00;
+}
+
+// FMRDH: SPR -> GPR
+// FMRDL: SPR -> GPR
+// FMRRS: SPR -> GPR
+// FMRX: SPR system reg -> GPR
+// FMSRR: GPR -> SPR
+// FMXR: GPR -> VFP system reg
+
+
+// Int -> FP:
+
+class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+}
+
+class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
+ [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> {
+ let Inst{7} = 1; // s32
+}
+
+def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+ (outs SPR:$Sd),(ins SPR:$Sm),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
+ let Inst{7} = 1; // s32
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
+ [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> {
+ let Inst{7} = 0; // u32
+}
+
+def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
+ let Inst{7} = 0; // u32
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+// FP -> Int:
+
+class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
+ let Inst{7} = 1; // Z bit
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
+ let Inst{7} = 1; // Z bit
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
+let Uses = [FPSCR] in {
+// FIXME: Verify encoding after integrated assembler is working.
+def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
+ let Inst{7} = 0; // Z bit
+}
+
+def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
+ let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
+ let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
+ let Inst{7} = 0; // Z bit
+}
+}
+
+// Convert between floating-point and fixed-point
+// Data type for fixed-point naming convention:
+// S16 (U=0, sx=0) -> SH
+// U16 (U=1, sx=0) -> UH
+// S32 (U=0, sx=1) -> SL
+// U32 (U=1, sx=1) -> UL
+
+// FIXME: Marking these as codegen only seems wrong. They are real
+// instructions(?)
+let Constraints = "$a = $dst", isCodeGenOnly = 1 in {
+
+// FP to Fixed-Point:
+
+def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+// Fixed-Point to FP:
+
+def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+} // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in'
+
+//===----------------------------------------------------------------------===//
+// FP FMA Operations.
+//
+
+def VMLAD : ADbI<0b11100, 0b00, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
+
+def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
+
+def VMLSD : ADbI<0b11100, 0b00, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
+
+def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
+
+def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+ (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
+ (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
+
+def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+ (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
+ (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+
+//===----------------------------------------------------------------------===//
+// FP Conditional moves.
+//
+
+let neverHasSideEffects = 1 in {
+def VMOVDcc : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm",
+ [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
+ RegConstraint<"$Dn = $Dd">;
+
+def VMOVScc : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm",
+ [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
+ RegConstraint<"$Sn = $Sd">;
+
+def VNEGDcc : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+ [/*(set DPR:$Dd, (ARMcneg DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
+ RegConstraint<"$Dn = $Dd">;
+
+def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
+ [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
+ RegConstraint<"$Sn = $Sd"> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Move from VFP System Register to ARM core register.
+//
+
+class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+ list<dag> pattern>:
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+ // Instruction operand.
+ bits<4> Rt;
+
+ let Inst{27-20} = 0b11101111;
+ let Inst{19-16} = opc19_16;
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = 0b1010;
+ let Inst{7} = 0;
+ let Inst{6-5} = 0b00;
+ let Inst{4} = 1;
+ let Inst{3-0} = 0b0000;
+}
+
+// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
+// to APSR.
+let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in
+def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
+ "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>;
+
+// Application level FPSCR -> GPR
+let hasSideEffects = 1, Uses = [FPSCR] in
+def VMRS : MovFromVFP<0b0001 /* fpscr */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpscr",
+ [(set GPR:$Rt, (int_arm_get_fpscr))]>;
+
+// System level FPEXC, FPSID -> GPR
+let Uses = [FPSCR] in {
+ def VMRS_FPEXC : MovFromVFP<0b1000 /* fpexc */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpexc", []>;
+ def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpsid", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move from ARM core register to VFP System Register.
+//
+
+class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+ list<dag> pattern>:
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+ // Instruction operand.
+ bits<4> src;
+
+ // Encode instruction operand.
+ let Inst{15-12} = src;
+
+ let Inst{27-20} = 0b11101110;
+ let Inst{19-16} = opc19_16;
+ let Inst{11-8} = 0b1010;
+ let Inst{7} = 0;
+ let Inst{4} = 1;
+}
+
+let Defs = [FPSCR] in {
+ // Application level GPR -> FPSCR
+ def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpscr, $src", [(int_arm_set_fpscr GPR:$src)]>;
+ // System level GPR -> FPEXC
+ def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpexc, $src", []>;
+ // System level GPR -> FPSID
+ def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpsid, $src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc.
+//
+
+// Materialize FP immediates. VFP3 only.
+let isReMaterializable = 1 in {
+def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
+ VFPMiscFrm, IIC_fpUNA64,
+ "vmov", ".f64\t$Dd, $imm",
+ [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<32> imm;
+
+ // Encode instruction operands.
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+ let Inst{19} = imm{31};
+ let Inst{18-16} = imm{22-20};
+ let Inst{3-0} = imm{19-16};
+
+ // Encode remaining instruction bits.
+ let Inst{27-23} = 0b11101;
+ let Inst{21-20} = 0b11;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision.
+ let Inst{7-4} = 0b0000;
+}
+
+def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
+ VFPMiscFrm, IIC_fpUNA32,
+ "vmov", ".f32\t$Sd, $imm",
+ [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<32> imm;
+
+ // Encode instruction operands.
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+ let Inst{19} = imm{31}; // The immediate is handled as a double.
+ let Inst{18-16} = imm{22-20};
+ let Inst{3-0} = imm{19-16};
+
+ // Encode remaining instruction bits.
+ let Inst{27-23} = 0b11101;
+ let Inst{21-20} = 0b11;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision.
+ let Inst{7-4} = 0b0000;
+}
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp
new file mode 100644
index 0000000..45b7e48
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp
@@ -0,0 +1,336 @@
+//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARMJITInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Memory.h"
+#include <cstdlib>
+using namespace llvm;
+
+void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction");
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because the prolog/epilog inserted by GCC won't work for us. (We need
+// to preserve more context and manipulate the stack directly). Instead,
+// write our own wrapper, which does things our way, so we have complete
+// control over register saving and restoring.
+extern "C" {
+#if defined(__arm__)
+ void ARMCompilationCallback();
+ asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl " ASMPREFIX "ARMCompilationCallback\n"
+ ASMPREFIX "ARMCompilationCallback:\n"
+ // Save caller saved registers since they may contain stuff
+ // for the real target function right now. We have to act as if this
+ // whole compilation callback doesn't exist as far as the caller is
+ // concerned, so we can't just preserve the callee saved regs.
+ "stmdb sp!, {r0, r1, r2, r3, lr}\n"
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
+ "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+ // The LR contains the address of the stub function on entry.
+ // pass it as the argument to the C part of the callback
+ "mov r0, lr\n"
+ "sub sp, sp, #4\n"
+ // Call the C portion of the callback
+ "bl " ASMPREFIX "ARMCompilationCallbackC\n"
+ "add sp, sp, #4\n"
+ // Restoring the LR to the return address of the function that invoked
+ // the stub and de-allocating the stack space for it requires us to
+ // swap the two saved LR values on the stack, as they're backwards
+ // for what we need since the pop instruction has a pre-determined
+ // order for the registers.
+ // +--------+
+ // 0 | LR | Original return address
+ // +--------+
+ // 1 | LR | Stub address (start of stub)
+ // 2-5 | R3..R0 | Saved registers (we need to preserve all regs)
+ // 6-20 | D0..D7 | Saved VFP registers
+ // +--------+
+ //
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
+ // Restore VFP caller-saved registers.
+ "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+ //
+ // We need to exchange the values in slots 0 and 1 so we can
+ // return to the address in slot 1 with the address in slot 0
+ // restored to the LR.
+ "ldr r0, [sp,#20]\n"
+ "ldr r1, [sp,#16]\n"
+ "str r1, [sp,#20]\n"
+ "str r0, [sp,#16]\n"
+ // Return to the (newly modified) stub to invoke the real function.
+ // The above twiddling of the saved return addresses allows us to
+ // deallocate everything, including the LR the stub saved, with two
+ // updating load instructions.
+ "ldmia sp!, {r0, r1, r2, r3, lr}\n"
+ "ldr pc, [sp], #4\n"
+ );
+#else // Not an ARM host
+ void ARMCompilationCallback() {
+ llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!");
+ }
+#endif
+}
+
+/// ARMCompilationCallbackC - This is the target-specific function invoked
+/// by the function stub when we did not know the real target of a call.
+/// This function must locate the start of the stub or call site and pass
+/// it into the JIT compiler function.
+extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) {
+ // Get the address of the compiled code for this function.
+ intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr);
+
+ // Rewrite the call target... so that we don't end up here every time we
+ // execute the call. We're replacing the first two instructions of the
+ // stub with:
+ // ldr pc, [pc,#-4]
+ // <addr>
+ if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ *(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4]
+ *(intptr_t *)(StubAddr+4) = NewVal;
+ if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+}
+
+TargetJITInfo::LazyResolverFn
+ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+ return ARMCompilationCallback;
+}
+
+void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
+ JITCodeEmitter &JCE) {
+ uint8_t Buffer[4];
+ uint8_t *Cur = Buffer;
+ MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)Ptr);
+ void *PtrAddr = JCE.allocIndirectGV(
+ GV, Buffer, sizeof(Buffer), /*Alignment=*/4);
+ addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
+ return PtrAddr;
+}
+
+TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() {
+ // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a
+ // 4-byte address. See emitFunctionStub for details.
+ StubLayout Result = {16, 4};
+ return Result;
+}
+
+void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ void *Addr;
+ // If this is just a call to an external function, emit a branch instead of a
+ // call. The code is the same except for one bit of the last instruction.
+ if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
+ // Branch to the corresponding function addr.
+ if (IsPIC) {
+ // The stub is 16-byte size and 4-aligned.
+ intptr_t LazyPtr = getIndirectSymAddr(Fn);
+ if (!LazyPtr) {
+ // In PIC mode, the function stub is loading a lazy-ptr.
+ LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE);
+ DEBUG(if (F)
+ errs() << "JIT: Indirect symbol emitted at [" << LazyPtr
+ << "] for GV '" << F->getName() << "'\n";
+ else
+ errs() << "JIT: Stub emitted at [" << LazyPtr
+ << "] for external function at '" << Fn << "'\n");
+ }
+ JCE.emitAlignment(4);
+ Addr = (void*)JCE.getCurrentPCValue();
+ if (!sys::Memory::setRangeWritable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ JCE.emitWordLE(0xe59fc004); // ldr ip, [pc, #+4]
+ JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip
+ JCE.emitWordLE(0xe59cf000); // ldr pc, [ip]
+ JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8)); // func - (L_func$scv+8)
+ sys::Memory::InvalidateInstructionCache(Addr, 16);
+ if (!sys::Memory::setRangeExecutable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+ } else {
+ // The stub is 8-byte size and 4-aligned.
+ JCE.emitAlignment(4);
+ Addr = (void*)JCE.getCurrentPCValue();
+ if (!sys::Memory::setRangeWritable(Addr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
+ JCE.emitWordLE((intptr_t)Fn); // addr of function
+ sys::Memory::InvalidateInstructionCache(Addr, 8);
+ if (!sys::Memory::setRangeExecutable(Addr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+ }
+ } else {
+ // The compilation callback will overwrite the first two words of this
+ // stub with indirect branch instructions targeting the compiled code.
+ // This stub sets the return address to restart the stub, so that
+ // the new branch will be invoked when we come back.
+ //
+ // Branch and link to the compilation callback.
+ // The stub is 16-byte size and 4-byte aligned.
+ JCE.emitAlignment(4);
+ Addr = (void*)JCE.getCurrentPCValue();
+ if (!sys::Memory::setRangeWritable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ // Save LR so the callback can determine which stub called it.
+ // The compilation callback is responsible for popping this prior
+ // to returning.
+ JCE.emitWordLE(0xe92d4000); // push {lr}
+ // Set the return address to go back to the start of this stub.
+ JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12
+ // Invoke the compilation callback.
+ JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
+ // The address of the compilation callback.
+ JCE.emitWordLE((intptr_t)ARMCompilationCallback);
+ sys::Memory::InvalidateInstructionCache(Addr, 16);
+ if (!sys::Memory::setRangeExecutable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+ }
+
+ return Addr;
+}
+
+intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const {
+ ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType();
+ switch (RT) {
+ default:
+ return (intptr_t)(MR->getResultPointer());
+ case ARM::reloc_arm_pic_jt:
+ // Destination address - jump table base.
+ return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal();
+ case ARM::reloc_arm_jt_base:
+ // Jump table base address.
+ return getJumpTableBaseAddr(MR->getJumpTableIndex());
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ // Constant pool entry address.
+ return getConstantPoolEntryAddr(MR->getConstantPoolIndex());
+ case ARM::reloc_arm_machine_cp_entry: {
+ ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal();
+ assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) &&
+ "Can't handle this machine constant pool entry yet!");
+ intptr_t Addr = (intptr_t)(MR->getResultPointer());
+ Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment();
+ return Addr;
+ }
+ }
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = resolveRelocDestAddr(MR);
+ switch ((ARM::RelocationType)MR->getRelocationType()) {
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ case ARM::reloc_arm_relative: {
+ // It is necessary to calculate the correct PC relative value. We
+ // subtract the base addr from the target addr to form a byte offset.
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ // If the result is positive, set bit U(23) to 1.
+ if (ResultPtr >= 0)
+ *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift;
+ else {
+ // Otherwise, obtain the absolute value and set bit U(23) to 0.
+ *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift);
+ ResultPtr = - ResultPtr;
+ }
+ // Set the immed value calculated.
+ // VFP immediate offset is multiplied by 4.
+ if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
+ ResultPtr = ResultPtr >> 2;
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ // Set register Rn to PC.
+ *((intptr_t*)RelocPos) |=
+ getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ break;
+ }
+ case ARM::reloc_arm_pic_jt:
+ case ARM::reloc_arm_machine_cp_entry:
+ case ARM::reloc_arm_absolute: {
+ // These addresses have already been resolved.
+ *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_branch: {
+ // It is necessary to calculate the correct value of signed_immed_24
+ // field. We subtract the base addr from the target addr to form a
+ // byte offset, which must be inside the range -33554432 and +33554428.
+ // Then, we set the signed_immed_24 field of the instruction to bits
+ // [25:2] of the byte offset. More details ARM-ARM p. A4-11.
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2;
+ assert(ResultPtr >= -33554432 && ResultPtr <= 33554428);
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_jt_base: {
+ // JT base - (instruction addr + 8)
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_movw: {
+ ResultPtr = ResultPtr & 0xFFFF;
+ *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+ *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+ break;
+ }
+ case ARM::reloc_arm_movt: {
+ ResultPtr = (ResultPtr >> 16) & 0xFFFF;
+ *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+ *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+ break;
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.h b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h
new file mode 100644
index 0000000..2f97928
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h
@@ -0,0 +1,183 @@
+//===- ARMJITInfo.h - ARM implementation of the JIT interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMJITINFO_H
+#define ARMJITINFO_H
+
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+ class ARMTargetMachine;
+
+ class ARMJITInfo : public TargetJITInfo {
+ // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding
+ // CONSTPOOL_ENTRY addresses.
+ SmallVector<intptr_t, 16> ConstPoolId2AddrMap;
+
+ // JumpTableId2AddrMap - A map from inline jumptable ids to the
+ // corresponding inline jump table bases.
+ SmallVector<intptr_t, 16> JumpTableId2AddrMap;
+
+ // PCLabelMap - A map from PC labels to addresses.
+ DenseMap<unsigned, intptr_t> PCLabelMap;
+
+ // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol)
+ // addresses to their indirect symbol addresses.
+ DenseMap<void*, intptr_t> Sym2IndirectSymMap;
+
+ // IsPIC - True if the relocation model is PIC. This is used to determine
+ // how to codegen function stubs.
+ bool IsPIC;
+
+ public:
+ explicit ARMJITInfo() : IsPIC(false) { useGOT = false; }
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+ /// to emit an indirect symbol which contains the address of the specified
+ /// ptr.
+ virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE);
+
+ // getStubLayout - Returns the size and alignment of the largest call stub
+ // on ARM.
+ virtual StubLayout getStubLayout();
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// hasCustomConstantPool - Allows a target to specify that constant
+ /// pool address resolution is handled by the target.
+ virtual bool hasCustomConstantPool() const { return true; }
+
+ /// hasCustomJumpTables - Allows a target to specify that jumptables
+ /// are emitted by the target.
+ virtual bool hasCustomJumpTables() const { return true; }
+
+ /// allocateSeparateGVMemory - If true, globals should be placed in
+ /// separately allocated heap memory rather than in the same
+ /// code memory allocated by JITCodeEmitter.
+ virtual bool allocateSeparateGVMemory() const {
+#ifdef __APPLE__
+ return true;
+#else
+ return false;
+#endif
+ }
+
+ /// Initialize - Initialize internal stage for the function being JITted.
+ /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
+ /// jump table ids to jump table bases map; remember if codegen relocation
+ /// model is PIC.
+ void Initialize(const MachineFunction &MF, bool isPIC) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
+ JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
+ IsPIC = isPIC;
+ }
+
+ /// getConstantPoolEntryAddr - The ARM target puts all constant
+ /// pool entries into constant islands. This returns the address of the
+ /// constant pool entry of the specified index.
+ intptr_t getConstantPoolEntryAddr(unsigned CPI) const {
+ assert(CPI < ConstPoolId2AddrMap.size());
+ return ConstPoolId2AddrMap[CPI];
+ }
+
+ /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address
+ /// where its associated value is stored. When relocations are processed,
+ /// this value will be used to resolve references to the constant.
+ void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) {
+ assert(CPI < ConstPoolId2AddrMap.size());
+ ConstPoolId2AddrMap[CPI] = Addr;
+ }
+
+ /// getJumpTableBaseAddr - The ARM target inline all jump tables within
+ /// text section of the function. This returns the address of the base of
+ /// the jump table of the specified index.
+ intptr_t getJumpTableBaseAddr(unsigned JTI) const {
+ assert(JTI < JumpTableId2AddrMap.size());
+ return JumpTableId2AddrMap[JTI];
+ }
+
+ /// addJumpTableBaseAddr - Map a jump table index to the address where
+ /// the corresponding inline jump table is emitted. When relocations are
+ /// processed, this value will be used to resolve references to the
+ /// jump table.
+ void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) {
+ assert(JTI < JumpTableId2AddrMap.size());
+ JumpTableId2AddrMap[JTI] = Addr;
+ }
+
+ /// getPCLabelAddr - Retrieve the address of the PC label of the
+ /// specified id.
+ intptr_t getPCLabelAddr(unsigned Id) const {
+ DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id);
+ assert(I != PCLabelMap.end());
+ return I->second;
+ }
+
+ /// addPCLabelAddr - Remember the address of the specified PC label.
+ void addPCLabelAddr(unsigned Id, intptr_t Addr) {
+ PCLabelMap.insert(std::make_pair(Id, Addr));
+ }
+
+ /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the
+ /// specified symbol located at address. Returns 0 if the indirect symbol
+ /// has not been emitted.
+ intptr_t getIndirectSymAddr(void *Addr) const {
+ DenseMap<void*,intptr_t>::const_iterator I= Sym2IndirectSymMap.find(Addr);
+ if (I != Sym2IndirectSymMap.end())
+ return I->second;
+ return 0;
+ }
+
+ /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to
+ /// its indirect symbol address.
+ void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) {
+ Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr));
+ }
+
+ private:
+ /// resolveRelocDestAddr - Resolve the resulting address of the relocation
+ /// if it's not already solved. Constantpool entries must be resolved by
+ /// ARM target.
+ intptr_t resolveRelocDestAddr(MachineRelocation *MR) const;
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
new file mode 100644
index 0000000..d9dc5cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -0,0 +1,1838 @@
+//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs load / store related peephole
+// optimizations. This pass should be run after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-ldst-opt"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumLDMGened , "Number of ldm instructions generated");
+STATISTIC(NumSTMGened , "Number of stm instructions generated");
+STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
+STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
+STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
+STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
+STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
+STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
+STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
+STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
+STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
+
+/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
+/// load / store instructions to form ldm / stm instructions.
+
+namespace {
+ struct ARMLoadStoreOpt : public MachineFunctionPass {
+ static char ID;
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ ARMFunctionInfo *AFI;
+ RegScavenger *RS;
+ bool isThumb2;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM load / store optimization pass";
+ }
+
+ private:
+ struct MemOpQueueEntry {
+ int Offset;
+ unsigned Reg;
+ bool isKill;
+ unsigned Position;
+ MachineBasicBlock::iterator MBBI;
+ bool Merged;
+ MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
+ MachineBasicBlock::iterator i)
+ : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
+ };
+ typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
+ typedef MemOpQueue::iterator MemOpQueueIter;
+
+ bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ int Offset, unsigned Base, bool BaseKill, int Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
+ DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
+ void MergeOpsUpdate(MachineBasicBlock &MBB,
+ MemOpQueue &MemOps,
+ unsigned memOpsBegin,
+ unsigned memOpsEnd,
+ unsigned insertAfter,
+ int Offset,
+ unsigned Base,
+ bool BaseKill,
+ int Opcode,
+ ARMCC::CondCodes Pred,
+ unsigned PredReg,
+ unsigned Scratch,
+ DebugLoc dl,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+ void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
+ int Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+
+ void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+ bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
+ bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo *TII,
+ bool &Advance,
+ MachineBasicBlock::iterator &I);
+ bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ bool &Advance,
+ MachineBasicBlock::iterator &I);
+ bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
+ bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+ };
+ char ARMLoadStoreOpt::ID = 0;
+}
+
+static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDRi12:
+ ++NumLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::LDMIA;
+ case ARM_AM::da: return ARM::LDMDA;
+ case ARM_AM::db: return ARM::LDMDB;
+ case ARM_AM::ib: return ARM::LDMIB;
+ }
+ break;
+ case ARM::STRi12:
+ ++NumSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::STMIA;
+ case ARM_AM::da: return ARM::STMDA;
+ case ARM_AM::db: return ARM::STMDB;
+ case ARM_AM::ib: return ARM::STMIB;
+ }
+ break;
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ ++NumLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2LDMIA;
+ case ARM_AM::db: return ARM::t2LDMDB;
+ }
+ break;
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ ++NumSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2STMIA;
+ case ARM_AM::db: return ARM::t2STMDB;
+ }
+ break;
+ case ARM::VLDRS:
+ ++NumVLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMSIA;
+ case ARM_AM::db: return ARM::VLDMSDB;
+ }
+ break;
+ case ARM::VSTRS:
+ ++NumVSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMSIA;
+ case ARM_AM::db: return ARM::VSTMSDB;
+ }
+ break;
+ case ARM::VLDRD:
+ ++NumVLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMDIA;
+ case ARM_AM::db: return ARM::VLDMDDB;
+ }
+ break;
+ case ARM::VSTRD:
+ ++NumVSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMDIA;
+ case ARM_AM::db: return ARM::VSTMDDB;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+namespace llvm {
+ namespace ARM_AM {
+
+AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMIA_UPD:
+ case ARM::STMIA:
+ case ARM::STMIA_UPD:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2STMIA:
+ case ARM::t2STMIA_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VLDMDIA:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VSTMDIA:
+ case ARM::VSTMDIA_UPD:
+ return ARM_AM::ia;
+
+ case ARM::LDMDA:
+ case ARM::LDMDA_UPD:
+ case ARM::STMDA:
+ case ARM::STMDA_UPD:
+ return ARM_AM::da;
+
+ case ARM::LDMDB:
+ case ARM::LDMDB_UPD:
+ case ARM::STMDB:
+ case ARM::STMDB_UPD:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMDB:
+ case ARM::t2STMDB_UPD:
+ case ARM::VLDMSDB:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMSDB:
+ case ARM::VSTMSDB_UPD:
+ case ARM::VLDMDDB:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VSTMDDB:
+ case ARM::VSTMDDB_UPD:
+ return ARM_AM::db;
+
+ case ARM::LDMIB:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIB:
+ case ARM::STMIB_UPD:
+ return ARM_AM::ib;
+ }
+
+ return ARM_AM::bad_am_submode;
+}
+
+ } // end namespace ARM_AM
+} // end namespace llvm
+
+static bool isT2i32Load(unsigned Opc) {
+ return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
+}
+
+static bool isi32Load(unsigned Opc) {
+ return Opc == ARM::LDRi12 || isT2i32Load(Opc);
+}
+
+static bool isT2i32Store(unsigned Opc) {
+ return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
+}
+
+static bool isi32Store(unsigned Opc) {
+ return Opc == ARM::STRi12 || isT2i32Store(Opc);
+}
+
+/// MergeOps - Create and insert a LDM or STM with Base as base register and
+/// registers in Regs as the register operands that would be loaded / stored.
+/// It returns true if the transformation is done.
+bool
+ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ int Offset, unsigned Base, bool BaseKill,
+ int Opcode, ARMCC::CondCodes Pred,
+ unsigned PredReg, unsigned Scratch, DebugLoc dl,
+ SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
+ // Only a single register to load / store. Don't bother.
+ unsigned NumRegs = Regs.size();
+ if (NumRegs <= 1)
+ return false;
+
+ ARM_AM::AMSubMode Mode = ARM_AM::ia;
+ // VFP and Thumb2 do not support IB or DA modes.
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ bool haveIBAndDA = isNotVFP && !isThumb2;
+ if (Offset == 4 && haveIBAndDA)
+ Mode = ARM_AM::ib;
+ else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
+ Mode = ARM_AM::da;
+ else if (Offset == -4 * (int)NumRegs && isNotVFP)
+ // VLDM/VSTM do not support DB mode without also updating the base reg.
+ Mode = ARM_AM::db;
+ else if (Offset != 0) {
+ // If starting offset isn't zero, insert a MI to materialize a new base.
+ // But only do so if it is cost effective, i.e. merging more than two
+ // loads / stores.
+ if (NumRegs <= 2)
+ return false;
+
+ unsigned NewBase;
+ if (isi32Load(Opcode))
+ // If it is a load, then just use one of the destination register to
+ // use as the new base.
+ NewBase = Regs[NumRegs-1].first;
+ else {
+ // Use the scratch register to use as a new base.
+ NewBase = Scratch;
+ if (NewBase == 0)
+ return false;
+ }
+ int BaseOpc = !isThumb2
+ ? ARM::ADDri
+ : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
+ if (Offset < 0) {
+ BaseOpc = !isThumb2
+ ? ARM::SUBri
+ : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
+ Offset = - Offset;
+ }
+ int ImmedOffset = isThumb2
+ ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
+ if (ImmedOffset == -1)
+ // FIXME: Try t2ADDri12 or t2SUBri12?
+ return false; // Probably not worth it then.
+
+ BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg).addReg(0);
+ Base = NewBase;
+ BaseKill = true; // New base is always killed right its use.
+ }
+
+ bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
+ Opcode == ARM::VLDRD);
+ Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
+ | getKillRegState(Regs[i].second));
+
+ return true;
+}
+
+// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
+// success.
+void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
+ MemOpQueue &memOps,
+ unsigned memOpsBegin, unsigned memOpsEnd,
+ unsigned insertAfter, int Offset,
+ unsigned Base, bool BaseKill,
+ int Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch,
+ DebugLoc dl,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+ // First calculate which of the registers should be killed by the merged
+ // instruction.
+ const unsigned insertPos = memOps[insertAfter].Position;
+ SmallSet<unsigned, 4> KilledRegs;
+ DenseMap<unsigned, unsigned> Killer;
+ for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
+ if (i == memOpsBegin) {
+ i = memOpsEnd;
+ if (i == e)
+ break;
+ }
+ if (memOps[i].Position < insertPos && memOps[i].isKill) {
+ unsigned Reg = memOps[i].Reg;
+ KilledRegs.insert(Reg);
+ Killer[Reg] = i;
+ }
+ }
+
+ SmallVector<std::pair<unsigned, bool>, 8> Regs;
+ for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+ unsigned Reg = memOps[i].Reg;
+ // If we are inserting the merged operation after an operation that
+ // uses the same register, make sure to transfer any kill flag.
+ bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
+ Regs.push_back(std::make_pair(Reg, isKill));
+ }
+
+ // Try to do the merge.
+ MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
+ ++Loc;
+ if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
+ Pred, PredReg, Scratch, dl, Regs))
+ return;
+
+ // Merge succeeded, update records.
+ Merges.push_back(prior(Loc));
+ for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+ // Remove kill flags from any memops that come before insertPos.
+ if (Regs[i-memOpsBegin].second) {
+ unsigned Reg = Regs[i-memOpsBegin].first;
+ if (KilledRegs.count(Reg)) {
+ unsigned j = Killer[Reg];
+ int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
+ assert(Idx >= 0 && "Cannot find killing operand");
+ memOps[j].MBBI->getOperand(Idx).setIsKill(false);
+ memOps[j].isKill = false;
+ }
+ memOps[i].isKill = true;
+ }
+ MBB.erase(memOps[i].MBBI);
+ // Update this memop to refer to the merged instruction.
+ // We may need to move kill flags again.
+ memOps[i].Merged = true;
+ memOps[i].MBBI = Merges.back();
+ memOps[i].Position = insertPos;
+ }
+}
+
+/// MergeLDR_STR - Merge a number of load / store instructions into one or more
+/// load / store multiple instructions.
+void
+ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
+ unsigned Base, int Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ int Offset = MemOps[SIndex].Offset;
+ int SOffset = Offset;
+ unsigned insertAfter = SIndex;
+ MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
+ DebugLoc dl = Loc->getDebugLoc();
+ const MachineOperand &PMO = Loc->getOperand(0);
+ unsigned PReg = PMO.getReg();
+ unsigned PRegNum = PMO.isUndef() ? UINT_MAX
+ : getARMRegisterNumbering(PReg);
+ unsigned Count = 1;
+
+ for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
+ int NewOffset = MemOps[i].Offset;
+ const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ unsigned RegNum = MO.isUndef() ? UINT_MAX
+ : getARMRegisterNumbering(Reg);
+ // Register numbers must be in ascending order. For VFP, the registers
+ // must also be consecutive and there is a limit of 16 double-word
+ // registers per instruction.
+ if (Reg != ARM::SP &&
+ NewOffset == Offset + (int)Size &&
+ ((isNotVFP && RegNum > PRegNum)
+ || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
+ Offset += Size;
+ PRegNum = RegNum;
+ ++Count;
+ } else {
+ // Can't merge this in. Try merge the earlier ones first.
+ MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
+ Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
+ MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
+ MemOps, Merges);
+ return;
+ }
+
+ if (MemOps[i].Position > MemOps[insertAfter].Position)
+ insertAfter = i;
+ }
+
+ bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
+ MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
+ Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
+ return;
+}
+
+static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, unsigned Limit,
+ ARMCC::CondCodes Pred, unsigned PredReg){
+ unsigned MyPredReg = 0;
+ if (!MI)
+ return false;
+ if (MI->getOpcode() != ARM::t2SUBri &&
+ MI->getOpcode() != ARM::t2SUBrSPi &&
+ MI->getOpcode() != ARM::t2SUBrSPi12 &&
+ MI->getOpcode() != ARM::tSUBspi &&
+ MI->getOpcode() != ARM::SUBri)
+ return false;
+
+ // Make sure the offset fits in 8 bits.
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
+ return false;
+
+ unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
+ return (MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg);
+}
+
+static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, unsigned Limit,
+ ARMCC::CondCodes Pred, unsigned PredReg){
+ unsigned MyPredReg = 0;
+ if (!MI)
+ return false;
+ if (MI->getOpcode() != ARM::t2ADDri &&
+ MI->getOpcode() != ARM::t2ADDrSPi &&
+ MI->getOpcode() != ARM::t2ADDrSPi12 &&
+ MI->getOpcode() != ARM::tADDspi &&
+ MI->getOpcode() != ARM::ADDri)
+ return false;
+
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
+ // Make sure the offset fits in 8 bits.
+ return false;
+
+ unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
+ return (MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg);
+}
+
+static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case ARM::LDRi12:
+ case ARM::STRi12:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ case ARM::VLDRS:
+ case ARM::VSTRS:
+ return 4;
+ case ARM::VLDRD:
+ case ARM::VSTRD:
+ return 8;
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSDB:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSDB:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
+ case ARM::VLDMDIA:
+ case ARM::VLDMDDB:
+ case ARM::VSTMDIA:
+ case ARM::VSTMDDB:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
+ }
+}
+
+static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
+ ARM_AM::AMSubMode Mode) {
+ switch (Opc) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::LDMIA_UPD;
+ case ARM_AM::ib: return ARM::LDMIB_UPD;
+ case ARM_AM::da: return ARM::LDMDA_UPD;
+ case ARM_AM::db: return ARM::LDMDB_UPD;
+ }
+ break;
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::STMIA_UPD;
+ case ARM_AM::ib: return ARM::STMIB_UPD;
+ case ARM_AM::da: return ARM::STMDA_UPD;
+ case ARM_AM::db: return ARM::STMDB_UPD;
+ }
+ break;
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2LDMIA_UPD;
+ case ARM_AM::db: return ARM::t2LDMDB_UPD;
+ }
+ break;
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2STMIA_UPD;
+ case ARM_AM::db: return ARM::t2STMDB_UPD;
+ }
+ break;
+ case ARM::VLDMSIA:
+ case ARM::VLDMSDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMSIA_UPD;
+ case ARM_AM::db: return ARM::VLDMSDB_UPD;
+ }
+ break;
+ case ARM::VLDMDIA:
+ case ARM::VLDMDDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMDIA_UPD;
+ case ARM_AM::db: return ARM::VLDMDDB_UPD;
+ }
+ break;
+ case ARM::VSTMSIA:
+ case ARM::VSTMSDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMSIA_UPD;
+ case ARM_AM::db: return ARM::VSTMSDB_UPD;
+ }
+ break;
+ case ARM::VSTMDIA:
+ case ARM::VSTMDDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMDIA_UPD;
+ case ARM_AM::db: return ARM::VSTMDDB_UPD;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
+/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
+///
+/// stmia rn, <ra, rb, rc>
+/// rn := rn + 4 * 3;
+/// =>
+/// stmia rn!, <ra, rb, rc>
+///
+/// rn := rn - 4 * 3;
+/// ldmia rn, <ra, rb, rc>
+/// =>
+/// ldmdb rn!, <ra, rb, rc>
+bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ bool &Advance,
+ MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(0).getReg();
+ bool BaseKill = MI->getOperand(0).isKill();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+ int Opcode = MI->getOpcode();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // Can't use an updating ld/st if the base register is also a dest
+ // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+ for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
+ if (MI->getOperand(i).getReg() == Base)
+ return false;
+
+ bool DoMerge = false;
+ ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
+
+ // Try merging with the previous instruction.
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::db;
+ DoMerge = true;
+ } else if (Mode == ARM_AM::ib &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::da;
+ DoMerge = true;
+ }
+ if (DoMerge)
+ MBB.erase(PrevMBBI);
+ }
+
+ // Try merging with the next instruction.
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
+ if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+ isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
+ } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
+ }
+ if (DoMerge) {
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ }
+ }
+
+ if (!DoMerge)
+ return false;
+
+ unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ .addReg(Base, getDefRegState(true)) // WB base register
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg);
+
+ // Transfer the rest of operands.
+ for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
+ MIB.addOperand(MI->getOperand(OpNum));
+
+ // Transfer memoperands.
+ (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ MBB.erase(MBBI);
+ return true;
+}
+
+static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
+ ARM_AM::AddrOpc Mode) {
+ switch (Opc) {
+ case ARM::LDRi12:
+ return ARM::LDR_PRE;
+ case ARM::STRi12:
+ return ARM::STR_PRE;
+ case ARM::VLDRS:
+ return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+ case ARM::VLDRD:
+ return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+ case ARM::VSTRS:
+ return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+ case ARM::VSTRD:
+ return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ return ARM::t2LDR_PRE;
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ return ARM::t2STR_PRE;
+ default: llvm_unreachable("Unhandled opcode!");
+ }
+ return 0;
+}
+
+static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
+ ARM_AM::AddrOpc Mode) {
+ switch (Opc) {
+ case ARM::LDRi12:
+ return ARM::LDR_POST;
+ case ARM::STRi12:
+ return ARM::STR_POST;
+ case ARM::VLDRS:
+ return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+ case ARM::VLDRD:
+ return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+ case ARM::VSTRS:
+ return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+ case ARM::VSTRD:
+ return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ return ARM::t2LDR_POST;
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ return ARM::t2STR_POST;
+ default: llvm_unreachable("Unhandled opcode!");
+ }
+ return 0;
+}
+
+/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
+/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo *TII,
+ bool &Advance,
+ MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(1).getReg();
+ bool BaseKill = MI->getOperand(1).isKill();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ int Opcode = MI->getOpcode();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
+ Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
+ bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
+ if (isi32Load(Opcode) || isi32Store(Opcode))
+ if (MI->getOperand(2).getImm() != 0)
+ return false;
+ if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
+ return false;
+
+ bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
+ // Can't do the merge if the destination register is the same as the would-be
+ // writeback register.
+ if (isLd && MI->getOperand(0).getReg() == Base)
+ return false;
+
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+ bool DoMerge = false;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ unsigned NewOpc = 0;
+ // AM2 - 12 bits, thumb2 - 8 bits.
+ unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
+
+ // Try merging with the previous instruction.
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
+ if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ } else if (!isAM5 &&
+ isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
+ DoMerge = true;
+ }
+ if (DoMerge) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
+ MBB.erase(PrevMBBI);
+ }
+ }
+
+ // Try merging with the next instruction.
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
+ if (!isAM5 &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
+ DoMerge = true;
+ }
+ if (DoMerge) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ }
+ }
+
+ if (!DoMerge)
+ return false;
+
+ unsigned Offset = 0;
+ if (isAM2)
+ Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ else if (!isAM5)
+ Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
+
+ if (isAM5) {
+ // VLDM[SD}_UPD, VSTM[SD]_UPD
+ // (There are no base-updating versions of VLDR/VSTR instructions, but the
+ // updating load/store-multiple instructions can be used with only one
+ // register.)
+ MachineOperand &MO = MI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ .addReg(Base, getDefRegState(true)) // WB base register
+ .addReg(Base, getKillRegState(isLd ? BaseKill : false))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
+ getKillRegState(MO.isKill())));
+ } else if (isLd) {
+ if (isAM2)
+ // LDR_PRE, LDR_POST,
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ else
+ // t2LDR_PRE, t2LDR_POST
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ } else {
+ MachineOperand &MO = MI->getOperand(0);
+ if (isAM2)
+ // STR_PRE, STR_POST
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ else
+ // t2STR_PRE, t2STR_POST
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+ MBB.erase(MBBI);
+
+ return true;
+}
+
+/// isMemoryOp - Returns true if instruction is a memory operations (that this
+/// pass is capable of operating on).
+static bool isMemoryOp(const MachineInstr *MI) {
+ // When no memory operands are present, conservatively assume unaligned,
+ // volatile, unfoldable.
+ if (!MI->hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand *MMO = *MI->memoperands_begin();
+
+ // Don't touch volatile memory accesses - we may be changing their order.
+ if (MMO->isVolatile())
+ return false;
+
+ // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+ // not.
+ if (MMO->getAlignment() < 4)
+ return false;
+
+ // str <undef> could probably be eliminated entirely, but for now we just want
+ // to avoid making a mess of it.
+ // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+ if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
+ MI->getOperand(0).isUndef())
+ return false;
+
+ // Likewise don't mess with references to undefined addresses.
+ if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
+ MI->getOperand(1).isUndef())
+ return false;
+
+ int Opcode = MI->getOpcode();
+ switch (Opcode) {
+ default: break;
+ case ARM::VLDRS:
+ case ARM::VSTRS:
+ return MI->getOperand(1).isReg();
+ case ARM::VLDRD:
+ case ARM::VSTRD:
+ return MI->getOperand(1).isReg();
+ case ARM::LDRi12:
+ case ARM::STRi12:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ return MI->getOperand(1).isReg();
+ }
+ return false;
+}
+
+/// AdvanceRS - Advance register scavenger to just before the earliest memory
+/// op that is being merged.
+void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
+ MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
+ unsigned Position = MemOps[0].Position;
+ for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
+ if (MemOps[i].Position < Position) {
+ Position = MemOps[i].Position;
+ Loc = MemOps[i].MBBI;
+ }
+ }
+
+ if (Loc != MBB.begin())
+ RS->forward(prior(Loc));
+}
+
+static int getMemoryOpOffset(const MachineInstr *MI) {
+ int Opcode = MI->getOpcode();
+ bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+
+ if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
+ Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
+ Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
+ Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
+ return OffField;
+
+ int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
+ : ARM_AM::getAM5Offset(OffField) * 4;
+ if (isAM3) {
+ if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ } else {
+ if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ }
+ return Offset;
+}
+
+static void InsertLDR_STR(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ int Offset, bool isDef,
+ DebugLoc dl, unsigned NewOpc,
+ unsigned Reg, bool RegDeadKill, bool RegUndef,
+ unsigned BaseReg, bool BaseKill, bool BaseUndef,
+ bool OffKill, bool OffUndef,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const TargetInstrInfo *TII, bool isT2) {
+ if (isDef) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII->get(NewOpc))
+ .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
+ .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ } else {
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII->get(NewOpc))
+ .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
+ .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+}
+
+bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr *MI = &*MBBI;
+ unsigned Opcode = MI->getOpcode();
+ if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
+ Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
+ unsigned EvenReg = MI->getOperand(0).getReg();
+ unsigned OddReg = MI->getOperand(1).getReg();
+ unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
+ unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
+ if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
+ return false;
+
+ MachineBasicBlock::iterator NewBBI = MBBI;
+ bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
+ bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
+ bool EvenDeadKill = isLd ?
+ MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
+ bool EvenUndef = MI->getOperand(0).isUndef();
+ bool OddDeadKill = isLd ?
+ MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
+ bool OddUndef = MI->getOperand(1).isUndef();
+ const MachineOperand &BaseOp = MI->getOperand(2);
+ unsigned BaseReg = BaseOp.getReg();
+ bool BaseKill = BaseOp.isKill();
+ bool BaseUndef = BaseOp.isUndef();
+ bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
+ bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
+ int OffImm = getMemoryOpOffset(MI);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+
+ if (OddRegNum > EvenRegNum && OffImm == 0) {
+ // Ascending register numbers and no offset. It's safe to change it to a
+ // ldm or stm.
+ unsigned NewOpc = (isLd)
+ ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
+ : (isT2 ? ARM::t2STMIA : ARM::STMIA);
+ if (isLd) {
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
+ .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+ ++NumLDRD2LDM;
+ } else {
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(EvenReg,
+ getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
+ .addReg(OddReg,
+ getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
+ ++NumSTRD2STM;
+ }
+ NewBBI = llvm::prior(MBBI);
+ } else {
+ // Split into two instructions.
+ unsigned NewOpc = (isLd)
+ ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+ : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+ DebugLoc dl = MBBI->getDebugLoc();
+ // If this is a load and base register is killed, it may have been
+ // re-defed by the load, make sure the first load does not clobber it.
+ if (isLd &&
+ (BaseKill || OffKill) &&
+ (TRI->regsOverlap(EvenReg, BaseReg))) {
+ assert(!TRI->regsOverlap(OddReg, BaseReg));
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+ OddReg, OddDeadKill, false,
+ BaseReg, false, BaseUndef, false, OffUndef,
+ Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
+ InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+ EvenReg, EvenDeadKill, false,
+ BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+ Pred, PredReg, TII, isT2);
+ } else {
+ if (OddReg == EvenReg && EvenDeadKill) {
+ // If the two source operands are the same, the kill marker is
+ // probably on the first one. e.g.
+ // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
+ EvenDeadKill = false;
+ OddDeadKill = true;
+ }
+ InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+ EvenReg, EvenDeadKill, EvenUndef,
+ BaseReg, false, BaseUndef, false, OffUndef,
+ Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+ OddReg, OddDeadKill, OddUndef,
+ BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+ Pred, PredReg, TII, isT2);
+ }
+ if (isLd)
+ ++NumLDRD2LDR;
+ else
+ ++NumSTRD2STR;
+ }
+
+ MBB.erase(MI);
+ MBBI = NewBBI;
+ return true;
+ }
+ return false;
+}
+
+/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
+/// ops of the same base and incrementing offset into LDM / STM ops.
+bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
+ unsigned NumMerges = 0;
+ unsigned NumMemOps = 0;
+ MemOpQueue MemOps;
+ unsigned CurrBase = 0;
+ int CurrOpc = -1;
+ unsigned CurrSize = 0;
+ ARMCC::CondCodes CurrPred = ARMCC::AL;
+ unsigned CurrPredReg = 0;
+ unsigned Position = 0;
+ SmallVector<MachineBasicBlock::iterator,4> Merges;
+
+ RS->enterBasicBlock(&MBB);
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ if (FixInvalidRegPairOp(MBB, MBBI))
+ continue;
+
+ bool Advance = false;
+ bool TryMerge = false;
+ bool Clobber = false;
+
+ bool isMemOp = isMemoryOp(MBBI);
+ if (isMemOp) {
+ int Opcode = MBBI->getOpcode();
+ unsigned Size = getLSMultipleTransferSize(MBBI);
+ const MachineOperand &MO = MBBI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ bool isKill = MO.isDef() ? false : MO.isKill();
+ unsigned Base = MBBI->getOperand(1).getReg();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
+ int Offset = getMemoryOpOffset(MBBI);
+ // Watch out for:
+ // r4 := ldr [r5]
+ // r5 := ldr [r5, #4]
+ // r6 := ldr [r5, #8]
+ //
+ // The second ldr has effectively broken the chain even though it
+ // looks like the later ldr(s) use the same base register. Try to
+ // merge the ldr's so far, including this one. But don't try to
+ // combine the following ldr(s).
+ Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
+ if (CurrBase == 0 && !Clobber) {
+ // Start of a new chain.
+ CurrBase = Base;
+ CurrOpc = Opcode;
+ CurrSize = Size;
+ CurrPred = Pred;
+ CurrPredReg = PredReg;
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
+ ++NumMemOps;
+ Advance = true;
+ } else {
+ if (Clobber) {
+ TryMerge = true;
+ Advance = true;
+ }
+
+ if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
+ // No need to match PredReg.
+ // Continue adding to the queue.
+ if (Offset > MemOps.back().Offset) {
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
+ Advance = true;
+ } else {
+ for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
+ I != E; ++I) {
+ if (Offset < I->Offset) {
+ MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
+ Advance = true;
+ break;
+ } else if (Offset == I->Offset) {
+ // Collision! This can't be merged!
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (MBBI->isDebugValue()) {
+ ++MBBI;
+ if (MBBI == E)
+ // Reach the end of the block, try merging the memory instructions.
+ TryMerge = true;
+ } else if (Advance) {
+ ++Position;
+ ++MBBI;
+ if (MBBI == E)
+ // Reach the end of the block, try merging the memory instructions.
+ TryMerge = true;
+ } else
+ TryMerge = true;
+
+ if (TryMerge) {
+ if (NumMemOps > 1) {
+ // Try to find a free register to use as a new base in case it's needed.
+ // First advance to the instruction just before the start of the chain.
+ AdvanceRS(MBB, MemOps);
+ // Find a scratch register.
+ unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
+ // Process the load / store instructions.
+ RS->forward(prior(MBBI));
+
+ // Merge ops.
+ Merges.clear();
+ MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
+ CurrPred, CurrPredReg, Scratch, MemOps, Merges);
+
+ // Try folding preceeding/trailing base inc/dec into the generated
+ // LDM/STM ops.
+ for (unsigned i = 0, e = Merges.size(); i < e; ++i)
+ if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
+ ++NumMerges;
+ NumMerges += Merges.size();
+
+ // Try folding preceeding/trailing base inc/dec into those load/store
+ // that were not merged to form LDM/STM ops.
+ for (unsigned i = 0; i != NumMemOps; ++i)
+ if (!MemOps[i].Merged)
+ if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
+ ++NumMerges;
+
+ // RS may be pointing to an instruction that's deleted.
+ RS->skipTo(prior(MBBI));
+ } else if (NumMemOps == 1) {
+ // Try folding preceeding/trailing base inc/dec into the single
+ // load/store.
+ if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
+ ++NumMerges;
+ RS->forward(prior(MBBI));
+ }
+ }
+
+ CurrBase = 0;
+ CurrOpc = -1;
+ CurrSize = 0;
+ CurrPred = ARMCC::AL;
+ CurrPredReg = 0;
+ if (NumMemOps) {
+ MemOps.clear();
+ NumMemOps = 0;
+ }
+
+ // If iterator hasn't been advanced and this is not a memory op, skip it.
+ // It can't start a new chain anyway.
+ if (!Advance && !isMemOp && MBBI != E) {
+ ++Position;
+ ++MBBI;
+ }
+ }
+ }
+ return NumMerges > 0;
+}
+
+/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
+/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
+/// directly restore the value of LR into pc.
+/// ldmfd sp!, {..., lr}
+/// bx lr
+/// or
+/// ldmfd sp!, {..., lr}
+/// mov pc, lr
+/// =>
+/// ldmfd sp!, {..., pc}
+bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+ if (MBB.empty()) return false;
+
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ if (MBBI != MBB.begin() &&
+ (MBBI->getOpcode() == ARM::BX_RET ||
+ MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::MOVPCLR)) {
+ MachineInstr *PrevMI = prior(MBBI);
+ unsigned Opcode = PrevMI->getOpcode();
+ if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
+ Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
+ Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
+ MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
+ if (MO.getReg() != ARM::LR)
+ return false;
+ unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
+ assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
+ Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
+ PrevMI->setDesc(TII->get(NewOpc));
+ MO.setReg(ARM::PC);
+ PrevMI->copyImplicitOps(&*MBBI);
+ MBB.erase(MBBI);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetMachine &TM = Fn.getTarget();
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ RS = new RegScavenger();
+ isThumb2 = AFI->isThumb2Function();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= LoadStoreMultipleOpti(MBB);
+ if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
+ Modified |= MergeReturnIntoLDM(MBB);
+ }
+
+ delete RS;
+ return Modified;
+}
+
+
+/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
+/// load / stores from consecutive locations close to make it more
+/// likely they will be combined later.
+
+namespace {
+ struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
+ static char ID;
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+ const TargetData *TD;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
+ MachineRegisterInfo *MRI;
+ MachineFunction *MF;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM pre- register allocation load / store optimization pass";
+ }
+
+ private:
+ bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
+ unsigned &NewOpc, unsigned &EvenReg,
+ unsigned &OddReg, unsigned &BaseReg,
+ int &Offset,
+ unsigned &PredReg, ARMCC::CondCodes &Pred,
+ bool &isT2);
+ bool RescheduleOps(MachineBasicBlock *MBB,
+ SmallVector<MachineInstr*, 4> &Ops,
+ unsigned Base, bool isLd,
+ DenseMap<MachineInstr*, unsigned> &MI2LocMap);
+ bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
+ };
+ char ARMPreAllocLoadStoreOpt::ID = 0;
+}
+
+bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ TD = Fn.getTarget().getTargetData();
+ TII = Fn.getTarget().getInstrInfo();
+ TRI = Fn.getTarget().getRegisterInfo();
+ STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+ MRI = &Fn.getRegInfo();
+ MF = &Fn;
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI)
+ Modified |= RescheduleLoadStoreInstrs(MFI);
+
+ return Modified;
+}
+
+static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ SmallPtrSet<MachineInstr*, 4> &MemOps,
+ SmallSet<unsigned, 4> &MemRegs,
+ const TargetRegisterInfo *TRI) {
+ // Are there stores / loads / calls between them?
+ // FIXME: This is overly conservative. We should make use of alias information
+ // some day.
+ SmallSet<unsigned, 4> AddedRegPressure;
+ while (++I != E) {
+ if (I->isDebugValue() || MemOps.count(&*I))
+ continue;
+ const TargetInstrDesc &TID = I->getDesc();
+ if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects())
+ return false;
+ if (isLd && TID.mayStore())
+ return false;
+ if (!isLd) {
+ if (TID.mayLoad())
+ return false;
+ // It's not safe to move the first 'str' down.
+ // str r1, [r0]
+ // strh r5, [r0]
+ // str r4, [r0, #+4]
+ if (TID.mayStore())
+ return false;
+ }
+ for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
+ MachineOperand &MO = I->getOperand(j);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (MO.isDef() && TRI->regsOverlap(Reg, Base))
+ return false;
+ if (Reg != Base && !MemRegs.count(Reg))
+ AddedRegPressure.insert(Reg);
+ }
+ }
+
+ // Estimate register pressure increase due to the transformation.
+ if (MemRegs.size() <= 4)
+ // Ok if we are moving small number of instructions.
+ return true;
+ return AddedRegPressure.size() <= MemRegs.size() * 2;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
+ DebugLoc &dl,
+ unsigned &NewOpc, unsigned &EvenReg,
+ unsigned &OddReg, unsigned &BaseReg,
+ int &Offset, unsigned &PredReg,
+ ARMCC::CondCodes &Pred,
+ bool &isT2) {
+ // Make sure we're allowed to generate LDRD/STRD.
+ if (!STI->hasV5TEOps())
+ return false;
+
+ // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
+ unsigned Scale = 1;
+ unsigned Opcode = Op0->getOpcode();
+ if (Opcode == ARM::LDRi12)
+ NewOpc = ARM::LDRD;
+ else if (Opcode == ARM::STRi12)
+ NewOpc = ARM::STRD;
+ else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
+ NewOpc = ARM::t2LDRDi8;
+ Scale = 4;
+ isT2 = true;
+ } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
+ NewOpc = ARM::t2STRDi8;
+ Scale = 4;
+ isT2 = true;
+ } else
+ return false;
+
+ // Make sure the base address satisfies i64 ld / st alignment requirement.
+ if (!Op0->hasOneMemOperand() ||
+ !(*Op0->memoperands_begin())->getValue() ||
+ (*Op0->memoperands_begin())->isVolatile())
+ return false;
+
+ unsigned Align = (*Op0->memoperands_begin())->getAlignment();
+ const Function *Func = MF->getFunction();
+ unsigned ReqAlign = STI->hasV6Ops()
+ ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
+ : 8; // Pre-v6 need 8-byte align
+ if (Align < ReqAlign)
+ return false;
+
+ // Then make sure the immediate offset fits.
+ int OffImm = getMemoryOpOffset(Op0);
+ if (isT2) {
+ if (OffImm < 0) {
+ if (OffImm < -255)
+ // Can't fall back to t2LDRi8 / t2STRi8.
+ return false;
+ } else {
+ int Limit = (1 << 8) * Scale;
+ if (OffImm >= Limit || (OffImm & (Scale-1)))
+ return false;
+ }
+ Offset = OffImm;
+ } else {
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (OffImm < 0) {
+ AddSub = ARM_AM::sub;
+ OffImm = - OffImm;
+ }
+ int Limit = (1 << 8) * Scale;
+ if (OffImm >= Limit || (OffImm & (Scale-1)))
+ return false;
+ Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
+ }
+ EvenReg = Op0->getOperand(0).getReg();
+ OddReg = Op1->getOperand(0).getReg();
+ if (EvenReg == OddReg)
+ return false;
+ BaseReg = Op0->getOperand(1).getReg();
+ Pred = llvm::getInstrPredicate(Op0, PredReg);
+ dl = Op0->getDebugLoc();
+ return true;
+}
+
+namespace {
+ struct OffsetCompare {
+ bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+ int LOffset = getMemoryOpOffset(LHS);
+ int ROffset = getMemoryOpOffset(RHS);
+ assert(LHS == RHS || LOffset != ROffset);
+ return LOffset > ROffset;
+ }
+ };
+}
+
+bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
+ SmallVector<MachineInstr*, 4> &Ops,
+ unsigned Base, bool isLd,
+ DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
+ bool RetVal = false;
+
+ // Sort by offset (in reverse order).
+ std::sort(Ops.begin(), Ops.end(), OffsetCompare());
+
+ // The loads / stores of the same base are in order. Scan them from first to
+ // last and check for the following:
+ // 1. Any def of base.
+ // 2. Any gaps.
+ while (Ops.size() > 1) {
+ unsigned FirstLoc = ~0U;
+ unsigned LastLoc = 0;
+ MachineInstr *FirstOp = 0;
+ MachineInstr *LastOp = 0;
+ int LastOffset = 0;
+ unsigned LastOpcode = 0;
+ unsigned LastBytes = 0;
+ unsigned NumMove = 0;
+ for (int i = Ops.size() - 1; i >= 0; --i) {
+ MachineInstr *Op = Ops[i];
+ unsigned Loc = MI2LocMap[Op];
+ if (Loc <= FirstLoc) {
+ FirstLoc = Loc;
+ FirstOp = Op;
+ }
+ if (Loc >= LastLoc) {
+ LastLoc = Loc;
+ LastOp = Op;
+ }
+
+ unsigned Opcode = Op->getOpcode();
+ if (LastOpcode && Opcode != LastOpcode)
+ break;
+
+ int Offset = getMemoryOpOffset(Op);
+ unsigned Bytes = getLSMultipleTransferSize(Op);
+ if (LastBytes) {
+ if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
+ break;
+ }
+ LastOffset = Offset;
+ LastBytes = Bytes;
+ LastOpcode = Opcode;
+ if (++NumMove == 8) // FIXME: Tune this limit.
+ break;
+ }
+
+ if (NumMove <= 1)
+ Ops.pop_back();
+ else {
+ SmallPtrSet<MachineInstr*, 4> MemOps;
+ SmallSet<unsigned, 4> MemRegs;
+ for (int i = NumMove-1; i >= 0; --i) {
+ MemOps.insert(Ops[i]);
+ MemRegs.insert(Ops[i]->getOperand(0).getReg());
+ }
+
+ // Be conservative, if the instructions are too far apart, don't
+ // move them. We want to limit the increase of register pressure.
+ bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
+ if (DoMove)
+ DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
+ MemOps, MemRegs, TRI);
+ if (!DoMove) {
+ for (unsigned i = 0; i != NumMove; ++i)
+ Ops.pop_back();
+ } else {
+ // This is the new location for the loads / stores.
+ MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
+ while (InsertPos != MBB->end()
+ && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
+ ++InsertPos;
+
+ // If we are moving a pair of loads / stores, see if it makes sense
+ // to try to allocate a pair of registers that can form register pairs.
+ MachineInstr *Op0 = Ops.back();
+ MachineInstr *Op1 = Ops[Ops.size()-2];
+ unsigned EvenReg = 0, OddReg = 0;
+ unsigned BaseReg = 0, PredReg = 0;
+ ARMCC::CondCodes Pred = ARMCC::AL;
+ bool isT2 = false;
+ unsigned NewOpc = 0;
+ int Offset = 0;
+ DebugLoc dl;
+ if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
+ EvenReg, OddReg, BaseReg,
+ Offset, PredReg, Pred, isT2)) {
+ Ops.pop_back();
+ Ops.pop_back();
+
+ // Form the pair instruction.
+ if (isLd) {
+ MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
+ dl, TII->get(NewOpc))
+ .addReg(EvenReg, RegState::Define)
+ .addReg(OddReg, RegState::Define)
+ .addReg(BaseReg);
+ // FIXME: We're converting from LDRi12 to an insn that still
+ // uses addrmode2, so we need an explicit offset reg. It should
+ // always by reg0 since we're transforming LDRi12s.
+ if (!isT2)
+ MIB.addReg(0);
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ ++NumLDRDFormed;
+ } else {
+ MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
+ dl, TII->get(NewOpc))
+ .addReg(EvenReg)
+ .addReg(OddReg)
+ .addReg(BaseReg);
+ // FIXME: We're converting from LDRi12 to an insn that still
+ // uses addrmode2, so we need an explicit offset reg. It should
+ // always by reg0 since we're transforming STRi12s.
+ if (!isT2)
+ MIB.addReg(0);
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ ++NumSTRDFormed;
+ }
+ MBB->erase(Op0);
+ MBB->erase(Op1);
+
+ // Add register allocation hints to form register pairs.
+ MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
+ MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
+ } else {
+ for (unsigned i = 0; i != NumMove; ++i) {
+ MachineInstr *Op = Ops.back();
+ Ops.pop_back();
+ MBB->splice(InsertPos, MBB, Op);
+ }
+ }
+
+ NumLdStMoved += NumMove;
+ RetVal = true;
+ }
+ }
+ }
+
+ return RetVal;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
+ bool RetVal = false;
+
+ DenseMap<MachineInstr*, unsigned> MI2LocMap;
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
+ SmallVector<unsigned, 4> LdBases;
+ SmallVector<unsigned, 4> StBases;
+
+ unsigned Loc = 0;
+ MachineBasicBlock::iterator MBBI = MBB->begin();
+ MachineBasicBlock::iterator E = MBB->end();
+ while (MBBI != E) {
+ for (; MBBI != E; ++MBBI) {
+ MachineInstr *MI = MBBI;
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.isCall() || TID.isTerminator()) {
+ // Stop at barriers.
+ ++MBBI;
+ break;
+ }
+
+ if (!MI->isDebugValue())
+ MI2LocMap[MI] = ++Loc;
+
+ if (!isMemoryOp(MI))
+ continue;
+ unsigned PredReg = 0;
+ if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
+ continue;
+
+ int Opc = MI->getOpcode();
+ bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
+ unsigned Base = MI->getOperand(1).getReg();
+ int Offset = getMemoryOpOffset(MI);
+
+ bool StopHere = false;
+ if (isLd) {
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+ Base2LdsMap.find(Base);
+ if (BI != Base2LdsMap.end()) {
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(BI->second[i])) {
+ StopHere = true;
+ break;
+ }
+ }
+ if (!StopHere)
+ BI->second.push_back(MI);
+ } else {
+ SmallVector<MachineInstr*, 4> MIs;
+ MIs.push_back(MI);
+ Base2LdsMap[Base] = MIs;
+ LdBases.push_back(Base);
+ }
+ } else {
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+ Base2StsMap.find(Base);
+ if (BI != Base2StsMap.end()) {
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(BI->second[i])) {
+ StopHere = true;
+ break;
+ }
+ }
+ if (!StopHere)
+ BI->second.push_back(MI);
+ } else {
+ SmallVector<MachineInstr*, 4> MIs;
+ MIs.push_back(MI);
+ Base2StsMap[Base] = MIs;
+ StBases.push_back(Base);
+ }
+ }
+
+ if (StopHere) {
+ // Found a duplicate (a base+offset combination that's seen earlier).
+ // Backtrack.
+ --Loc;
+ break;
+ }
+ }
+
+ // Re-schedule loads.
+ for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
+ unsigned Base = LdBases[i];
+ SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
+ if (Lds.size() > 1)
+ RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
+ }
+
+ // Re-schedule stores.
+ for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
+ unsigned Base = StBases[i];
+ SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
+ if (Sts.size() > 1)
+ RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
+ }
+
+ if (MBBI != E) {
+ Base2LdsMap.clear();
+ Base2StsMap.clear();
+ LdBases.clear();
+ StBases.clear();
+ }
+ }
+
+ return RetVal;
+}
+
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
+ if (PreAlloc)
+ return new ARMPreAllocLoadStoreOpt();
+ return new ARMLoadStoreOpt();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMCAsmInfo.cpp
new file mode 100644
index 0000000..53edfca
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCAsmInfo.cpp
@@ -0,0 +1,68 @@
+//===-- ARMMCAsmInfo.cpp - ARM asm properties -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the ARMMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCAsmInfo.h"
+using namespace llvm;
+
+static const char *const arm_asm_table[] = {
+ "{r0}", "r0",
+ "{r1}", "r1",
+ "{r2}", "r2",
+ "{r3}", "r3",
+ "{r4}", "r4",
+ "{r5}", "r5",
+ "{r6}", "r6",
+ "{r7}", "r7",
+ "{r8}", "r8",
+ "{r9}", "r9",
+ "{r10}", "r10",
+ "{r11}", "r11",
+ "{r12}", "r12",
+ "{r13}", "r13",
+ "{r14}", "r14",
+ "{lr}", "lr",
+ "{sp}", "sp",
+ "{ip}", "ip",
+ "{fp}", "fp",
+ "{sl}", "sl",
+ "{memory}", "memory",
+ "{cc}", "cc",
+ 0,0
+};
+
+ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
+ AsmTransCBE = arm_asm_table;
+ Data64bitsDirective = 0;
+ CommentString = "@";
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::SjLj;
+}
+
+ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
+ // ".comm align is in bytes but .align is pow-2."
+ AlignmentIsInBytes = false;
+
+ Data64bitsDirective = 0;
+ CommentString = "@";
+
+ HasLEB128 = true;
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+ HasLCOMMDirective = true;
+
+ DwarfRequiresFrameSection = false;
+
+ SupportsDebugInformation = true;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCAsmInfo.h b/contrib/llvm/lib/Target/ARM/ARMMCAsmInfo.h
new file mode 100644
index 0000000..90f7822
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCAsmInfo.h
@@ -0,0 +1,31 @@
+//=====-- ARMMCAsmInfo.h - ARM asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARMTARGETASMINFO_H
+#define LLVM_ARMTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+
+ struct ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ explicit ARMMCAsmInfoDarwin();
+ };
+
+ struct ARMELFMCAsmInfo : public MCAsmInfo {
+ explicit ARMELFMCAsmInfo();
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp
new file mode 100644
index 0000000..6d7b485
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp
@@ -0,0 +1,1230 @@
+//===-- ARM/ARMMCCodeEmitter.cpp - Convert ARM code to machine code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMFixupKinds.h"
+#include "ARMInstrInfo.h"
+#include "ARMMCExpr.h"
+#include "ARMSubtarget.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
+STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
+
+namespace {
+class ARMMCCodeEmitter : public MCCodeEmitter {
+ ARMMCCodeEmitter(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ const ARMSubtarget *Subtarget;
+ MCContext &Ctx;
+
+public:
+ ARMMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
+ : TM(tm), TII(*TM.getInstrInfo()),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()), Ctx(ctx) {
+ }
+
+ ~ARMMCCodeEmitter() {}
+
+ unsigned getMachineSoImmOpValue(unsigned SoImm) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ unsigned getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getHiLo16ImmOpValue - Return the encoding for the hi / low 16-bit of
+ /// the specified operand. This is used for operands with :lower16: and
+ /// :upper16: prefixes.
+ uint32_t getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ bool EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx,
+ unsigned &Reg, unsigned &Imm,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBLTargetOpValue - Return encoding info for Thumb immediate
+ /// BL branch target.
+ uint32_t getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+ /// BLX branch target.
+ uint32_t getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getBranchTargetOpValue - Return encoding info for 24-bit immediate
+ /// branch target.
+ uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+ /// immediate Thumb2 direct branch target.
+ uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate
+ /// branch target.
+ uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAdrLabelOpValue - Return encoding info for 12-bit immediate
+ /// ADR label target.
+ uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ /// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12'
+ /// operand.
+ uint32_t getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbAddrModeRegRegOpValue - Return encoding for 'reg + reg' operand.
+ uint32_t getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups)const;
+
+ /// getT2AddrModeImm8s4OpValue - Return encoding info for 'reg +/- imm8<<2'
+ /// operand.
+ uint32_t getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ /// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm'
+ /// operand as needed by load/store instructions.
+ uint32_t getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getLdStmModeOpValue - Return encoding for load/store multiple mode.
+ uint32_t getLdStmModeOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm();
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::da: return 0;
+ case ARM_AM::ia: return 1;
+ case ARM_AM::db: return 2;
+ case ARM_AM::ib: return 3;
+ }
+ }
+ /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+ ///
+ unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const {
+ switch (ShOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::no_shift:
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::asr: return 2;
+ case ARM_AM::ror:
+ case ARM_AM::rrx: return 3;
+ }
+ return 0;
+ }
+
+ /// getAddrMode2OpValue - Return encoding for addrmode2 operands.
+ uint32_t getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode2OffsetOpValue - Return encoding for am2offset operands.
+ uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode3OffsetOpValue - Return encoding for am3offset operands.
+ uint32_t getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode3OpValue - Return encoding for addrmode3 operands.
+ uint32_t getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrModeThumbSPOpValue - Return encoding info for 'reg +/- imm12'
+ /// operand.
+ uint32_t getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+ uint32_t getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+ uint32_t getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm8' operand.
+ uint32_t getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getCCOutOpValue - Return encoding of the 's' bit.
+ unsigned getCCOutOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // The operand is either reg0 or CPSR. The 's' bit is encoded as '0' or
+ // '1' respectively.
+ return MI.getOperand(Op).getReg() == ARM::CPSR;
+ }
+
+ /// getSOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+ unsigned getSOImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned SoImm = MI.getOperand(Op).getImm();
+ int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ // Encode rotate_imm.
+ unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
+ << ARMII::SoRotImmShift;
+
+ // Encode immed_8.
+ Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
+ return Binary;
+ }
+
+ /// getT2SOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+ unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned SoImm = MI.getOperand(Op).getImm();
+ unsigned Encoded = ARM_AM::getT2SOImmVal(SoImm);
+ assert(Encoded != ~0U && "Not a Thumb2 so_imm value?");
+ return Encoded;
+ }
+
+ unsigned getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getSORegOpValue - Return an encoded so_reg shifted register value.
+ unsigned getSORegOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2SORegOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getRotImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ switch (MI.getOperand(Op).getImm()) {
+ default: assert (0 && "Not a valid rot_imm value!");
+ case 0: return 0;
+ case 8: return 1;
+ case 16: return 2;
+ case 24: return 3;
+ }
+ }
+
+ unsigned getImmMinusOneOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return MI.getOperand(Op).getImm() - 1;
+ }
+
+ unsigned getNEONVcvtImm32OpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 64 - MI.getOperand(Op).getImm();
+ }
+
+ unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getMsbOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+ unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+ unsigned NEONThumb2DupPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+
+ unsigned VFPThumb2PostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+
+ void EmitByte(unsigned char C, raw_ostream &OS) const {
+ OS << (char)C;
+ }
+
+ void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
+ // Output the constant in little endian byte order.
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(Val & 255, OS);
+ Val >>= 8;
+ }
+ }
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createARMMCCodeEmitter(const Target &, TargetMachine &TM,
+ MCContext &Ctx) {
+ return new ARMMCCodeEmitter(TM, Ctx);
+}
+
+/// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (Subtarget->isThumb2()) {
+ // NEON Thumb2 data-processsing encodings are very simple: bit 24 is moved
+ // to bit 12 of the high half-word (i.e. bit 28), and bits 27-24 are
+ // set to 1111.
+ unsigned Bit24 = EncodedValue & 0x01000000;
+ unsigned Bit28 = Bit24 << 4;
+ EncodedValue &= 0xEFFFFFFF;
+ EncodedValue |= Bit28;
+ EncodedValue |= 0x0F000000;
+ }
+
+ return EncodedValue;
+}
+
+/// NEONThumb2LoadStorePostEncoder - Post-process encoded NEON load/store
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (Subtarget->isThumb2()) {
+ EncodedValue &= 0xF0FFFFFF;
+ EncodedValue |= 0x09000000;
+ }
+
+ return EncodedValue;
+}
+
+/// NEONThumb2DupPostEncoder - Post-process encoded NEON vdup
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (Subtarget->isThumb2()) {
+ EncodedValue &= 0x00FFFFFF;
+ EncodedValue |= 0xEE000000;
+ }
+
+ return EncodedValue;
+}
+
+/// VFPThumb2PostEncoder - Post-process encoded VFP instructions and rewrite
+/// them to their Thumb2 form if we are currently in Thumb2 mode.
+unsigned ARMMCCodeEmitter::
+VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const {
+ if (Subtarget->isThumb2()) {
+ EncodedValue &= 0x0FFFFFFF;
+ EncodedValue |= 0xE0000000;
+ }
+ return EncodedValue;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned ARMMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ unsigned RegNo = getARMRegisterNumbering(Reg);
+
+ // Q registers are encoded as 2x their register number.
+ switch (Reg) {
+ default:
+ return RegNo;
+ case ARM::Q0: case ARM::Q1: case ARM::Q2: case ARM::Q3:
+ case ARM::Q4: case ARM::Q5: case ARM::Q6: case ARM::Q7:
+ case ARM::Q8: case ARM::Q9: case ARM::Q10: case ARM::Q11:
+ case ARM::Q12: case ARM::Q13: case ARM::Q14: case ARM::Q15:
+ return 2 * RegNo;
+ }
+ } else if (MO.isImm()) {
+ return static_cast<unsigned>(MO.getImm());
+ } else if (MO.isFPImm()) {
+ return static_cast<unsigned>(APFloat(MO.getFPImm())
+ .bitcastToAPInt().getHiBits(32).getLimitedValue());
+ }
+
+ llvm_unreachable("Unable to encode MCOperand!");
+ return 0;
+}
+
+/// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand.
+bool ARMMCCodeEmitter::
+EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
+ unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+
+ Reg = getARMRegisterNumbering(MO.getReg());
+
+ int32_t SImm = MO1.getImm();
+ bool isAdd = true;
+
+ // Special value for #-0
+ if (SImm == INT32_MIN)
+ SImm = 0;
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (SImm < 0) {
+ SImm = -SImm;
+ isAdd = false;
+ }
+
+ Imm = SImm;
+ return isAdd;
+}
+
+/// getBranchTargetOpValue - Helper function to get the branch target operand,
+/// which is either an immediate or requires a fixup.
+static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ unsigned FixupKind,
+ SmallVectorImpl<MCFixup> &Fixups) {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+
+ // If the destination is an immediate, we have nothing to do.
+ if (MO.isImm()) return MO.getImm();
+ assert(MO.isExpr() && "Unexpected branch target type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(FixupKind);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ // All of the information is in the fixup.
+ return 0;
+}
+
+/// getThumbBLTargetOpValue - Return encoding info for immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl, Fixups);
+}
+
+/// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+/// BLX branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx, Fixups);
+}
+
+/// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br, Fixups);
+}
+
+/// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc, Fixups);
+}
+
+/// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups);
+}
+
+/// Return true if this branch has a non-always predication
+static bool HasConditionalBranch(const MCInst &MI) {
+ int NumOp = MI.getNumOperands();
+ if (NumOp >= 2) {
+ for (int i = 0; i < NumOp-1; ++i) {
+ const MCOperand &MCOp1 = MI.getOperand(i);
+ const MCOperand &MCOp2 = MI.getOperand(i + 1);
+ if (MCOp1.isImm() && MCOp2.isReg() &&
+ (MCOp2.getReg() == 0 || MCOp2.getReg() == ARM::CPSR)) {
+ if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL)
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // FIXME: This really, really shouldn't use TargetMachine. We don't want
+ // coupling between MC and TM anywhere we can help it.
+ if (Subtarget->isThumb2())
+ return
+ ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_condbranch, Fixups);
+ return getARMBranchTargetOpValue(MI, OpIdx, Fixups);
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (HasConditionalBranch(MI))
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_condbranch, Fixups);
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_uncondbranch, Fixups);
+}
+
+
+
+
+/// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+/// immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Val =
+ ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups);
+ bool I = (Val & 0x800000);
+ bool J1 = (Val & 0x400000);
+ bool J2 = (Val & 0x200000);
+ if (I ^ J1)
+ Val &= ~0x400000;
+ else
+ Val |= 0x400000;
+
+ if (I ^ J2)
+ Val &= ~0x200000;
+ else
+ Val |= 0x200000;
+
+ return Val;
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
+ Fixups);
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
+ Fixups);
+}
+
+/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10,
+ Fixups);
+}
+
+/// getThumbAddrModeRegRegOpValue - Return encoding info for 'reg + reg'
+/// operand.
+uint32_t ARMMCCodeEmitter::
+getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &) const {
+ // [Rn, Rm]
+ // {5-3} = Rm
+ // {2-0} = Rn
+ const MCOperand &MO1 = MI.getOperand(OpIdx);
+ const MCOperand &MO2 = MI.getOperand(OpIdx + 1);
+ unsigned Rn = getARMRegisterNumbering(MO1.getReg());
+ unsigned Rm = getARMRegisterNumbering(MO2.getReg());
+ return (Rm << 3) | Rn;
+}
+
+/// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ unsigned Reg, Imm12;
+ bool isAdd = true;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Imm12 = 0;
+ isAdd = false ; // 'U' bit is set as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+
+ MCFixupKind Kind;
+ if (Subtarget->isThumb2())
+ Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
+ else
+ Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ ++MCNumCPRelocations;
+ } else
+ isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups);
+
+ uint32_t Binary = Imm12 & 0xfff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+}
+
+/// getT2AddrModeImm8s4OpValue - Return encoding info for
+/// 'reg +/- imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {12-9} = reg
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ unsigned Reg, Imm8;
+ bool isAdd = true;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Imm8 = 0;
+ isAdd = false ; // 'U' bit is set as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ ++MCNumCPRelocations;
+ } else
+ isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+
+ uint32_t Binary = (Imm8 >> 2) & 0xff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ Binary |= (Reg << 9);
+ return Binary;
+}
+
+// FIXME: This routine assumes that a binary
+// expression will always result in a PCRel expression
+// In reality, its only true if one or more subexpressions
+// is itself a PCRel (i.e. "." in asm or some other pcrel construct)
+// but this is good enough for now.
+static bool EvaluateAsPCRel(const MCExpr *Expr) {
+ switch (Expr->getKind()) {
+ default: assert(0 && "Unexpected expression type");
+ case MCExpr::SymbolRef: return false;
+ case MCExpr::Binary: return true;
+ }
+}
+
+uint32_t
+ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {20-16} = imm{15-12}
+ // {11-0} = imm{11-0}
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (MO.isImm())
+ // Hi / lo 16 bits already extracted during earlier passes.
+ return static_cast<unsigned>(MO.getImm());
+
+ // Handle :upper16: and :lower16: assembly prefixes.
+ const MCExpr *E = MO.getExpr();
+ if (E->getKind() == MCExpr::Target) {
+ const ARMMCExpr *ARM16Expr = cast<ARMMCExpr>(E);
+ E = ARM16Expr->getSubExpr();
+
+ MCFixupKind Kind;
+ switch (ARM16Expr->getKind()) {
+ default: assert(0 && "Unsupported ARMFixup");
+ case ARMMCExpr::VK_ARM_HI16:
+ if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E))
+ Kind = MCFixupKind(Subtarget->isThumb2()
+ ? ARM::fixup_t2_movt_hi16_pcrel
+ : ARM::fixup_arm_movt_hi16_pcrel);
+ else
+ Kind = MCFixupKind(Subtarget->isThumb2()
+ ? ARM::fixup_t2_movt_hi16
+ : ARM::fixup_arm_movt_hi16);
+ break;
+ case ARMMCExpr::VK_ARM_LO16:
+ if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E))
+ Kind = MCFixupKind(Subtarget->isThumb2()
+ ? ARM::fixup_t2_movw_lo16_pcrel
+ : ARM::fixup_arm_movw_lo16_pcrel);
+ else
+ Kind = MCFixupKind(Subtarget->isThumb2()
+ ? ARM::fixup_t2_movw_lo16
+ : ARM::fixup_arm_movw_lo16);
+ break;
+ }
+ Fixups.push_back(MCFixup::Create(0, E, Kind));
+ return 0;
+ };
+
+ llvm_unreachable("Unsupported MCExpr type in MCOperand!");
+ return 0;
+}
+
+uint32_t ARMMCCodeEmitter::
+getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rm = getARMRegisterNumbering(MO1.getReg());
+ unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm());
+ bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add;
+ ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
+ unsigned SBits = getShiftOp(ShOp);
+
+ // {16-13} = Rn
+ // {12} = isAdd
+ // {11-0} = shifter
+ // {3-0} = Rm
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+ uint32_t Binary = Rm;
+ Binary |= Rn << 13;
+ Binary |= SBits << 5;
+ Binary |= ShImm << 7;
+ if (isAdd)
+ Binary |= 1 << 12;
+ return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {17-14} Rn
+ // {13} 1 == imm12, 0 == Rm
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups);
+ Binary |= Rn << 14;
+ return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {13} 1 == imm12, 0 == Rm
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ unsigned Imm = MO1.getImm();
+ bool isAdd = ARM_AM::getAM2Op(Imm) == ARM_AM::add;
+ bool isReg = MO.getReg() != 0;
+ uint32_t Binary = ARM_AM::getAM2Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm12
+ if (isReg) {
+ ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm);
+ Binary <<= 7; // Shift amount is bits [11:7]
+ Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5]
+ Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0]
+ }
+ return Binary | (isAdd << 12) | (isReg << 13);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {9} 1 == imm8, 0 == Rm
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ unsigned Imm = MO1.getImm();
+ bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+ bool isImm = MO.getReg() == 0;
+ uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+ if (!isImm)
+ Imm8 = getARMRegisterNumbering(MO.getReg());
+ return Imm8 | (isAdd << 8) | (isImm << 9);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {13} 1 == imm8, 0 == Rm
+ // {12-9} Rn
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Imm = MO2.getImm();
+ bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+ bool isImm = MO1.getReg() == 0;
+ uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+ if (!isImm)
+ Imm8 = getARMRegisterNumbering(MO1.getReg());
+ return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13);
+}
+
+/// getAddrModeThumbSPOpValue - Encode the t_addrmode_sp operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // [SP, #imm]
+ // {7-0} = imm8
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ assert(MI.getOperand(OpIdx).getReg() == ARM::SP &&
+ "Unexpected base register!");
+
+ // The immediate is already shifted for the implicit zeroes, so no change
+ // here.
+ return MO1.getImm() & 0xff;
+}
+
+/// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // [Rn, #imm]
+ // {7-3} = imm5
+ // {2-0} = Rn
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Imm5 = MO1.getImm();
+ return ((Imm5 & 0x1f) << 3) | Rn;
+}
+
+/// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups);
+}
+
+/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {12-9} = reg
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ unsigned Reg, Imm8;
+ bool isAdd;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Imm8 = 0;
+ isAdd = false; // 'U' bit is handled as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind;
+ if (Subtarget->isThumb2())
+ Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
+ else
+ Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ ++MCNumCPRelocations;
+ } else {
+ EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+ isAdd = ARM_AM::getAM5Op(Imm8) == ARM_AM::add;
+ }
+
+ uint32_t Binary = ARM_AM::getAM5Offset(Imm8);
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ Binary |= (Reg << 9);
+ return Binary;
+}
+
+unsigned ARMMCCodeEmitter::
+getSORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Sub-operands are [reg, reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is either Rs, the amount to shift by, or reg0 in which
+ // case the imm contains the amount to shift by.
+ //
+ // {3-0} = Rm.
+ // {4} = 1 if reg shift, 0 if imm shift
+ // {6-5} = type
+ // If reg shift:
+ // {11-8} = Rs
+ // {7} = 0
+ // else (imm shift)
+ // {11-7} = imm
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx + 2);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+ // Encode Rm.
+ unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ unsigned Rs = MO1.getReg();
+ if (Rs) {
+ // Set shift operand (bit[7:4]).
+ // LSL - 0001
+ // LSR - 0011
+ // ASR - 0101
+ // ROR - 0111
+ // RRX - 0110 and bit[11:8] clear.
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x1; break;
+ case ARM_AM::lsr: SBits = 0x3; break;
+ case ARM_AM::asr: SBits = 0x5; break;
+ case ARM_AM::ror: SBits = 0x7; break;
+ case ARM_AM::rrx: SBits = 0x6; break;
+ }
+ } else {
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ }
+ }
+
+ Binary |= SBits << 4;
+ if (SOpc == ARM_AM::rrx)
+ return Binary;
+
+ // Encode the shift operation Rs or shift_imm (except rrx).
+ if (Rs) {
+ // Encode Rs bit[11:8].
+ assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+ return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ }
+
+ // Encode shift_imm bit[11:7].
+ return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+ const MCOperand &MO2 = MI.getOperand(OpNum+1);
+ const MCOperand &MO3 = MI.getOperand(OpNum+2);
+
+ // Encoded as [Rn, Rm, imm].
+ // FIXME: Needs fixup support.
+ unsigned Value = getARMRegisterNumbering(MO1.getReg());
+ Value <<= 4;
+ Value |= getARMRegisterNumbering(MO2.getReg());
+ Value <<= 2;
+ Value |= MO3.getImm();
+
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+ const MCOperand &MO2 = MI.getOperand(OpNum+1);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = getARMRegisterNumbering(MO1.getReg());
+
+ // Even though the immediate is 8 bits long, we need 9 bits in order
+ // to represent the (inverse of the) sign bit.
+ Value <<= 9;
+ int32_t tmp = (int32_t)MO2.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 256; // Set the ADD bit
+ Value |= tmp & 255;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = 0;
+ int32_t tmp = (int32_t)MO1.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 256; // Set the ADD bit
+ Value |= tmp & 255;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = 0;
+ int32_t tmp = (int32_t)MO1.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 4096; // Set the ADD bit
+ Value |= tmp & 4095;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Sub-operands are [reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is the amount to shift by.
+ //
+ // {3-0} = Rm.
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
+
+ // Encode Rm.
+ unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ }
+
+ Binary |= SBits << 4;
+ if (SOpc == ARM_AM::rrx)
+ return Binary;
+
+ // Encode shift_imm bit[11:7].
+ return Binary | ARM_AM::getSORegOffset(MO1.getImm()) << 7;
+}
+
+unsigned ARMMCCodeEmitter::
+getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // 10 bits. lower 5 bits are are the lsb of the mask, high five bits are the
+ // msb of the mask.
+ const MCOperand &MO = MI.getOperand(Op);
+ uint32_t v = ~MO.getImm();
+ uint32_t lsb = CountTrailingZeros_32(v);
+ uint32_t msb = (32 - CountLeadingZeros_32 (v)) - 1;
+ assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
+ return lsb | (msb << 5);
+}
+
+unsigned ARMMCCodeEmitter::
+getMsbOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // MSB - 5 bits.
+ uint32_t lsb = MI.getOperand(Op-1).getImm();
+ uint32_t width = MI.getOperand(Op).getImm();
+ uint32_t msb = lsb+width-1;
+ assert (width != 0 && msb < 32 && "Illegal bit width!");
+ return msb;
+}
+
+unsigned ARMMCCodeEmitter::
+getRegisterListOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // VLDM/VSTM:
+ // {12-8} = Vd
+ // {7-0} = Number of registers
+ //
+ // LDM/STM:
+ // {15-0} = Bitfield of GPRs.
+ unsigned Reg = MI.getOperand(Op).getReg();
+ bool SPRRegs = ARM::SPRRegClass.contains(Reg);
+ bool DPRRegs = ARM::DPRRegClass.contains(Reg);
+
+ unsigned Binary = 0;
+
+ if (SPRRegs || DPRRegs) {
+ // VLDM/VSTM
+ unsigned RegNo = getARMRegisterNumbering(Reg);
+ unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
+ Binary |= (RegNo & 0x1f) << 8;
+ if (SPRRegs)
+ Binary |= NumRegs;
+ else
+ Binary |= NumRegs * 2;
+ } else {
+ for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
+ unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg());
+ Binary |= 1 << RegNo;
+ }
+ }
+
+ return Binary;
+}
+
+/// getAddrMode6AddressOpValue - Encode an addrmode6 register number along
+/// with the alignment operand.
+unsigned ARMMCCodeEmitter::
+getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 2:
+ case 4:
+ case 8: Align = 0x01; break;
+ case 16: Align = 0x02; break;
+ case 32: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+/// getAddrMode6DupAddressOpValue - Encode an addrmode6 register number and
+/// alignment operand for use in VLD-dup instructions. This is the same as
+/// getAddrMode6AddressOpValue except for the alignment encoding, which is
+/// different for VLD4-dup.
+unsigned ARMMCCodeEmitter::
+getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 2:
+ case 4:
+ case 8: Align = 0x01; break;
+ case 16: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+unsigned ARMMCCodeEmitter::
+getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(Op);
+ if (MO.getReg() == 0) return 0x0D;
+ return MO.getReg();
+}
+
+void ARMMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Pseudo instructions don't get encoded.
+ const TargetInstrDesc &Desc = TII.get(MI.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+ if ((TSFlags & ARMII::FormMask) == ARMII::Pseudo)
+ return;
+ int Size;
+ // Basic size info comes from the TSFlags field.
+ switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
+ default: llvm_unreachable("Unexpected instruction size!");
+ case ARMII::Size2Bytes: Size = 2; break;
+ case ARMII::Size4Bytes: Size = 4; break;
+ }
+ uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+ // Thumb 32-bit wide instructions need to emit the high order halfword
+ // first.
+ if (Subtarget->isThumb() && Size == 4) {
+ EmitConstant(Binary >> 16, 2, OS);
+ EmitConstant(Binary & 0xffff, 2, OS);
+ } else
+ EmitConstant(Binary, Size, OS);
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+}
+
+#include "ARMGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp b/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp
new file mode 100644
index 0000000..2727ba8
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp
@@ -0,0 +1,73 @@
+//===-- ARMMCExpr.cpp - ARM specific MC expression classes ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "armmcexpr"
+#include "ARMMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAssembler.h"
+using namespace llvm;
+
+const ARMMCExpr*
+ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) ARMMCExpr(Kind, Expr);
+}
+
+void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
+ switch (Kind) {
+ default: assert(0 && "Invalid kind!");
+ case VK_ARM_HI16: OS << ":upper16:"; break;
+ case VK_ARM_LO16: OS << ":lower16:"; break;
+ }
+
+ const MCExpr *Expr = getSubExpr();
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << '(';
+ Expr->print(OS);
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << ')';
+}
+
+bool
+ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const {
+ return false;
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
+ switch (Value->getKind()) {
+ case MCExpr::Target:
+ assert(0 && "Can't handle nested target expr!");
+ break;
+
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+ AddValueSymbols_(BE->getLHS(), Asm);
+ AddValueSymbols_(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void ARMMCExpr::AddValueSymbols(MCAssembler *Asm) const {
+ AddValueSymbols_(getSubExpr(), Asm);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/ARMMCExpr.h
new file mode 100644
index 0000000..d42f766
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCExpr.h
@@ -0,0 +1,73 @@
+//===-- ARMMCExpr.h - ARM specific MC expression classes ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCEXPR_H
+#define ARMMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class ARMMCExpr : public MCTargetExpr {
+public:
+ enum VariantKind {
+ VK_ARM_None,
+ VK_ARM_HI16, // The R_ARM_MOVT_ABS relocation (:upper16: in the .s file)
+ VK_ARM_LO16 // The R_ARM_MOVW_ABS_NC relocation (:lower16: in the .s file)
+ };
+
+private:
+ const VariantKind Kind;
+ const MCExpr *Expr;
+
+ explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr)
+ : Kind(_Kind), Expr(_Expr) {}
+
+public:
+ /// @name Construction
+ /// @{
+
+ static const ARMMCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx);
+
+ static const ARMMCExpr *CreateUpper16(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_ARM_HI16, Expr, Ctx);
+ }
+
+ static const ARMMCExpr *CreateLower16(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_ARM_LO16, Expr, Ctx);
+ }
+
+ /// @}
+ /// @name Accessors
+ /// @{
+
+ /// getOpcode - Get the kind of this expression.
+ VariantKind getKind() const { return Kind; }
+
+ /// getSubExpr - Get the child of this expression.
+ const MCExpr *getSubExpr() const { return Expr; }
+
+ /// @}
+
+ void PrintImpl(raw_ostream &OS) const;
+ bool EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const;
+ void AddValueSymbols(MCAssembler *) const;
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+
+ static bool classof(const ARMMCExpr *) { return true; }
+
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
new file mode 100644
index 0000000..59d6050
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -0,0 +1,115 @@
+//===-- ARMMCInstLower.cpp - Convert ARM MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower ARM MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAsmPrinter.h"
+#include "ARMMCExpr.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+ ARMAsmPrinter &Printer) {
+ MCContext &Ctx = Printer.OutContext;
+ const MCExpr *Expr;
+ switch (MO.getTargetFlags()) {
+ default: {
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+ switch (MO.getTargetFlags()) {
+ default:
+ assert(0 && "Unknown target flag on symbol operand");
+ case 0:
+ break;
+ case ARMII::MO_LO16:
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+ Expr = ARMMCExpr::CreateLower16(Expr, Ctx);
+ break;
+ case ARMII::MO_HI16:
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+ Expr = ARMMCExpr::CreateUpper16(Expr, Ctx);
+ break;
+ }
+ break;
+ }
+
+ case ARMII::MO_PLT:
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT, Ctx);
+ break;
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+ return MCOperand::CreateExpr(Expr);
+
+}
+
+void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ ARMAsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ assert(0 && "unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all non-CPSR implicit register operands.
+ if (MO.isImplicit() && MO.getReg() != ARM::CPSR) continue;
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), AP.OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = GetSymbolRef(MO, AP.Mang->getSymbol(MO.getGlobal()), AP);
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO,
+ AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP);
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP);
+ break;
+ case MachineOperand::MO_FPImmediate: {
+ APFloat Val = MO.getFPImm()->getValueAPF();
+ bool ignored;
+ Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+ MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+ break;
+ }
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
new file mode 100644
index 0000000..138f0c2
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -0,0 +1,250 @@
+//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares ARM-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMACHINEFUNCTIONINFO_H
+#define ARMMACHINEFUNCTIONINFO_H
+
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+
+/// ARMFunctionInfo - This class is derived from MachineFunctionInfo and
+/// contains private ARM-specific information for each MachineFunction.
+class ARMFunctionInfo : public MachineFunctionInfo {
+
+ /// isThumb - True if this function is compiled under Thumb mode.
+ /// Used to initialized Align, so must precede it.
+ bool isThumb;
+
+ /// hasThumb2 - True if the target architecture supports Thumb2. Do not use
+ /// to determine if function is compiled under Thumb mode, for that use
+ /// 'isThumb'.
+ bool hasThumb2;
+
+ /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
+ ///
+ unsigned VarArgsRegSaveSize;
+
+ /// HasStackFrame - True if this function has a stack frame. Set by
+ /// processFunctionBeforeCalleeSavedScan().
+ bool HasStackFrame;
+
+ /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
+ /// emitPrologue.
+ bool RestoreSPFromFP;
+
+ /// LRSpilledForFarJump - True if the LR register has been for spilled to
+ /// enable far jump.
+ bool LRSpilledForFarJump;
+
+ /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
+ /// spill stack offset.
+ unsigned FramePtrSpillOffset;
+
+ /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
+ /// register spills areas. For Mac OS X:
+ ///
+ /// GPR callee-saved (1) : r4, r5, r6, r7, lr
+ /// --------------------------------------------
+ /// GPR callee-saved (2) : r8, r10, r11
+ /// --------------------------------------------
+ /// DPR callee-saved : d8 - d15
+ unsigned GPRCS1Offset;
+ unsigned GPRCS2Offset;
+ unsigned DPRCSOffset;
+
+ /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
+ /// areas.
+ unsigned GPRCS1Size;
+ unsigned GPRCS2Size;
+ unsigned DPRCSSize;
+
+ /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
+ /// which belong to these spill areas.
+ BitVector GPRCS1Frames;
+ BitVector GPRCS2Frames;
+ BitVector DPRCSFrames;
+
+ /// JumpTableUId - Unique id for jumptables.
+ ///
+ unsigned JumpTableUId;
+
+ unsigned PICLabelUId;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+ /// HasITBlocks - True if IT blocks have been inserted.
+ bool HasITBlocks;
+
+ /// CPEClones - Track constant pool entries clones created by Constant Island
+ /// pass.
+ DenseMap<unsigned, unsigned> CPEClones;
+
+public:
+ ARMFunctionInfo() :
+ isThumb(false),
+ hasThumb2(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ LRSpilledForFarJump(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
+ JumpTableUId(0), PICLabelUId(0),
+ VarArgsFrameIndex(0), HasITBlocks(false) {}
+
+ explicit ARMFunctionInfo(MachineFunction &MF) :
+ isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+ hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ LRSpilledForFarJump(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
+ JumpTableUId(0), PICLabelUId(0),
+ VarArgsFrameIndex(0), HasITBlocks(false) {}
+
+ bool isThumbFunction() const { return isThumb; }
+ bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
+ bool isThumb2Function() const { return isThumb && hasThumb2; }
+
+ unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
+ void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+
+ bool hasStackFrame() const { return HasStackFrame; }
+ void setHasStackFrame(bool s) { HasStackFrame = s; }
+
+ bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
+ void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
+
+ bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
+ void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
+
+ unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
+ void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
+ unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
+ unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; }
+
+ void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
+ void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
+ void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
+ unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+ unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
+
+ void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
+ void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+ void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
+
+ bool isGPRCalleeSavedArea1Frame(int fi) const {
+ if (fi < 0 || fi >= (int)GPRCS1Frames.size())
+ return false;
+ return GPRCS1Frames[fi];
+ }
+ bool isGPRCalleeSavedArea2Frame(int fi) const {
+ if (fi < 0 || fi >= (int)GPRCS2Frames.size())
+ return false;
+ return GPRCS2Frames[fi];
+ }
+ bool isDPRCalleeSavedAreaFrame(int fi) const {
+ if (fi < 0 || fi >= (int)DPRCSFrames.size())
+ return false;
+ return DPRCSFrames[fi];
+ }
+
+ void addGPRCalleeSavedArea1Frame(int fi) {
+ if (fi >= 0) {
+ int Size = GPRCS1Frames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ GPRCS1Frames.resize(Size);
+ }
+ GPRCS1Frames[fi] = true;
+ }
+ }
+ void addGPRCalleeSavedArea2Frame(int fi) {
+ if (fi >= 0) {
+ int Size = GPRCS2Frames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ GPRCS2Frames.resize(Size);
+ }
+ GPRCS2Frames[fi] = true;
+ }
+ }
+ void addDPRCalleeSavedAreaFrame(int fi) {
+ if (fi >= 0) {
+ int Size = DPRCSFrames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ DPRCSFrames.resize(Size);
+ }
+ DPRCSFrames[fi] = true;
+ }
+ }
+
+ unsigned createJumpTableUId() {
+ return JumpTableUId++;
+ }
+
+ unsigned getNumJumpTables() const {
+ return JumpTableUId;
+ }
+
+ void initPICLabelUId(unsigned UId) {
+ PICLabelUId = UId;
+ }
+
+ unsigned getNumPICLabels() const {
+ return PICLabelUId;
+ }
+
+ unsigned createPICLabelUId() {
+ return PICLabelUId++;
+ }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ bool hasITBlocks() const { return HasITBlocks; }
+ void setHasITBlocks(bool h) { HasITBlocks = h; }
+
+ void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
+ if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
+ assert(0 && "Duplicate entries!");
+ }
+
+ unsigned getOriginalCPIdx(unsigned CloneIdx) const {
+ DenseMap<unsigned, unsigned>::const_iterator I = CPEClones.find(CloneIdx);
+ if (I != CPEClones.end())
+ return I->second;
+ else
+ return -1U;
+ }
+};
+} // End llvm namespace
+
+#endif // ARMMACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h
new file mode 100644
index 0000000..edecc4b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle using neon instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 242 entries have cost 1
+// 1447 entries have cost 2
+// 3602 entries have cost 3
+// 1237 entries have cost 4
+// 2 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+ 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS
+ 1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS
+ 2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0>
+ 2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+ 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
+ 2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3>
+ 2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3>
+ 2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS
+ 2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0>
+ 1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS
+ 1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+ 2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0>
+ 2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5>
+ 2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7>
+ 3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1>
+ 2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1>
+ 1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS
+ 3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0>
+ 3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1>
+ 1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS
+ 2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0>
+ 3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6>
+ 3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6>
+ 2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7>
+ 2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+ 1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS
+ 2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+ 2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0>
+ 2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0>
+ 2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3>
+ 2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6>
+ 3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6>
+ 3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7>
+ 3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0>
+ 2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+ 2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1>
+ 2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+ 3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+ 3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4>
+ 3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6>
+ 1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+ 3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5>
+ 1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7>
+ 2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3>
+ 3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7>
+ 3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5>
+ 2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6>
+ 2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5>
+ 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
+ 2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7>
+ 2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7>
+ 2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7>
+ 3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS
+ 2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3>
+ 3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7>
+ 3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS
+ 3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0>
+ 2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6>
+ 2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0>
+ 2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0>
+ 2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+ 3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0>
+ 3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7>
+ 3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0>
+ 2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6>
+ 3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0>
+ 3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7>
+ 2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7>
+ 2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS
+ 1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS
+ 1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS
+ 2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u>
+ 1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS
+ 1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+ 2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u>
+ 135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS
+ 2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1>
+ 1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS
+ 2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1>
+ 2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0>
+ 2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5>
+ 4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7>
+ 2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1>
+ 2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0>
+ 1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS
+ 1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1>
+ 2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1>
+ 2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0>
+ 2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3>
+ 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
+ 2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7>
+ 2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3>
+ 2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1>
+ 1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS
+ 1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS
+ 3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1>
+ 2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2>
+ 835584U, // <0,1,2,3>: Cost 0 copy LHS
+ 1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS
+ 3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7>
+ 2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7>
+ 1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2>
+ 835584U, // <0,1,2,u>: Cost 0 copy LHS
+ 2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0>
+ 2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3>
+ 2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0>
+ 2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0>
+ 2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS
+ 2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7>
+ 2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0>
+ 2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1>
+ 2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3>
+ 2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS
+ 4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1>
+ 2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1>
+ 2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4>
+ 2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS
+ 1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS
+ 2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+ 2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4>
+ 1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS
+ 3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1>
+ 2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1>
+ 3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0>
+ 2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7>
+ 2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6>
+ 2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1>
+ 2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1>
+ 2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1>
+ 2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7>
+ 2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS
+ 3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7>
+ 2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1>
+ 3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7>
+ 2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS
+ 3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7>
+ 2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1>
+ 1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1>
+ 1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1>
+ 2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0>
+ 2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1>
+ 2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0>
+ 2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1>
+ 2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6>
+ 3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0>
+ 2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0>
+ 2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7>
+ 2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2>
+ 1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS
+ 1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS
+ 2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS
+ 835584U, // <0,1,u,3>: Cost 0 copy LHS
+ 1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS
+ 1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS
+ 2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+ 1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u>
+ 835584U, // <0,1,u,u>: Cost 0 copy LHS
+ 2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0>
+ 1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS
+ 1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS
+ 2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0>
+ 2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6>
+ 2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7>
+ 2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7>
+ 2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0>
+ 1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS
+ 2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2>
+ 2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1>
+ 2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2>
+ 2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+ 2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS
+ 2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7>
+ 2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7>
+ 3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7>
+ 2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2>
+ 1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2>
+ 2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2>
+ 2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2>
+ 3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+ 1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS
+ 2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3>
+ 2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7>
+ 2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2>
+ 1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS
+ 2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2>
+ 2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+ 3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3>
+ 2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+ 2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6>
+ 2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+ 3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3>
+ 2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0>
+ 2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+ 2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS
+ 4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3>
+ 2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4>
+ 2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4>
+ 2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS
+ 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
+ 1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS
+ 2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+ 1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS
+ 2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7>
+ 2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3>
+ 3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7>
+ 3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6>
+ 2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6>
+ 2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5>
+ 2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0>
+ 2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1>
+ 3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2>
+ 2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3>
+ 2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7>
+ 2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5>
+ 3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6>
+ 2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6>
+ 2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2>
+ 2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7>
+ 2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+ 2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2>
+ 3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2>
+ 2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0>
+ 2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6>
+ 2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2>
+ 3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2>
+ 2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7>
+ 2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+ 1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u>
+ 1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS
+ 1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS
+ 3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+ 1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS
+ 1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS
+ 1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS
+ 2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS
+ 2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0>
+ 2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2>
+ 2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0>
+ 4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3>
+ 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
+ 4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6>
+ 3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7>
+ 3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0>
+ 2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS
+ 2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2>
+ 2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1>
+ 2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3>
+ 2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3>
+ 2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6>
+ 4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6>
+ 3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1>
+ 3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3>
+ 2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2>
+ 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
+ 1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2>
+ 2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2>
+ 3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+ 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
+ 3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+ 2598154746U, // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3>
+ 2598155258U, // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2>
+ 1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS
+ 3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2>
+ 2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3>
+ 3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3>
+ 2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3>
+ 3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6>
+ 3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6>
+ 3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7>
+ 3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7>
+ 2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3>
+ 3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2>
+ 2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4>
+ 2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4>
+ 4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3>
+ 3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6>
+ 2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6>
+ 3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS
+ 3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4>
+ 2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6>
+ 3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS
+ 3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2>
+ 4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2>
+ 3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7>
+ 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5>
+ 3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7>
+ 2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7>
+ 2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0>
+ 2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5>
+ 2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7>
+ 3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3>
+ 3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7>
+ 3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7>
+ 3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7>
+ 3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6>
+ 3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6>
+ 2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0>
+ 2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0>
+ 3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1>
+ 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
+ 3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7>
+ 3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7>
+ 3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5>
+ 3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7>
+ 3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7>
+ 3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0>
+ 2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3>
+ 1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS
+ 1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u>
+ 2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2>
+ 3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+ 1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS
+ 3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+ 2598203898U, // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3>
+ 2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0>
+ 1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS
+ 2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4>
+ 2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS
+ 2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4>
+ 3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0>
+ 2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6>
+ 2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1>
+ 3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0>
+ 3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS
+ 3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1>
+ 3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0>
+ 2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1>
+ 2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS
+ 1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS
+ 3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS
+ 2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+ 1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS
+ 2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS
+ 2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2>
+ 3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2>
+ 2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4>
+ 2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS
+ 2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS
+ 1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS
+ 2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+ 1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS
+ 3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2>
+ 3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2>
+ 3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4>
+ 3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3>
+ 3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6>
+ 3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6>
+ 4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS
+ 3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4>
+ 3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2>
+ 2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4>
+ 3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0>
+ 3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3>
+ 3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4>
+ 2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4>
+ 2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS
+ 2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS
+ 3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4>
+ 2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS
+ 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
+ 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
+ 3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5>
+ 2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5>
+ 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
+ 2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6>
+ 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5>
+ 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6>
+ 3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2>
+ 2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6>
+ 3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0>
+ 2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6>
+ 3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6>
+ 2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0>
+ 2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4>
+ 2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2>
+ 3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS
+ 3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1>
+ 3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4>
+ 3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4>
+ 3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS
+ 2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+ 2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0>
+ 3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2>
+ 2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+ 2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS
+ 2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS
+ 2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS
+ 2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u>
+ 2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS
+ 1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS
+ 1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS
+ 2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u>
+ 1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS
+ 3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0>
+ 2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS
+ 3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2>
+ 3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5>
+ 3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS
+ 3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1>
+ 2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1>
+ 3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS
+ 2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS
+ 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
+ 2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3>
+ 2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2>
+ 2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2>
+ 1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1>
+ 2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5>
+ 2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0>
+ 2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3>
+ 1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS
+ 2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS
+ 2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2>
+ 3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7>
+ 2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2>
+ 2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS
+ 3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5>
+ 4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6>
+ 3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS
+ 3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS
+ 3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2>
+ 3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3>
+ 3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1>
+ 3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3>
+ 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
+ 3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0>
+ 3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7>
+ 2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0>
+ 2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0>
+ 2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1>
+ 3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4>
+ 3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4>
+ 3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5>
+ 3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6>
+ 2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS
+ 3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5>
+ 2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6>
+ 2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS
+ 3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0>
+ 3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0>
+ 3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5>
+ 3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0>
+ 3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5>
+ 2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5>
+ 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
+ 2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7>
+ 2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7>
+ 2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS
+ 3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0>
+ 3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3>
+ 3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4>
+ 2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS
+ 2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0>
+ 3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7>
+ 1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0>
+ 1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0>
+ 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
+ 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
+ 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
+ 3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2>
+ 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
+ 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
+ 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
+ 2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0>
+ 2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS
+ 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
+ 2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS
+ 2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0>
+ 2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u>
+ 1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u>
+ 2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS
+ 2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u>
+ 1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0>
+ 1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0>
+ 2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS
+ 2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2>
+ 3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4>
+ 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
+ 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
+ 2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0>
+ 2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS
+ 2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS
+ 3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1>
+ 2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3>
+ 3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3>
+ 2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS
+ 2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1>
+ 2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6>
+ 2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+ 2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS
+ 1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS
+ 2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2>
+ 2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2>
+ 2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1>
+ 1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2>
+ 2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3>
+ 3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6>
+ 2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS
+ 1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS
+ 3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2>
+ 3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7>
+ 3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0>
+ 3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3>
+ 2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6>
+ 3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7>
+ 3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0>
+ 2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0>
+ 2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0>
+ 2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS
+ 4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2>
+ 2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4>
+ 3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2>
+ 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
+ 2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0>
+ 4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS
+ 2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+ 3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0>
+ 3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7>
+ 3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0>
+ 3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5>
+ 3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0>
+ 3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7>
+ 4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS
+ 2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+ 3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0>
+ 3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3>
+ 3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6>
+ 3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0>
+ 3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4>
+ 3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6>
+ 2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6>
+ 2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7>
+ 2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7>
+ 2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1>
+ 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
+ 3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7>
+ 2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0>
+ 2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5>
+ 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
+ 3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2>
+ 2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0>
+ 2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1>
+ 1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS
+ 2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u>
+ 2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0>
+ 1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u>
+ 2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0>
+ 2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS
+ 1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS
+ 2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0>
+ 2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS
+ 2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0>
+ 3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0>
+ 2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5>
+ 2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6>
+ 2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7>
+ 3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7>
+ 2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS
+ 2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1>
+ 3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1>
+ 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
+ 3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5>
+ 2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1>
+ 3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3>
+ 3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7>
+ 2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7>
+ 2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1>
+ 2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS
+ 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2>
+ 3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2>
+ 2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0>
+ 2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS
+ 2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7>
+ 2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2>
+ 3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7>
+ 1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2>
+ 3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2>
+ 3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3>
+ 3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3>
+ 3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3>
+ 3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6>
+ 3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7>
+ 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+ 3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0>
+ 2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7>
+ 3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS
+ 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4>
+ 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4>
+ 3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7>
+ 3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6>
+ 2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS
+ 3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7>
+ 3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5>
+ 2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS
+ 3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0>
+ 3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7>
+ 3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5>
+ 3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7>
+ 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5>
+ 3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7>
+ 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
+ 2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0>
+ 2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7>
+ 3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0>
+ 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6>
+ 3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7>
+ 3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7>
+ 3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS
+ 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
+ 3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6>
+ 2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0>
+ 2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7>
+ 3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1>
+ 3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0>
+ 3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7>
+ 3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0>
+ 3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS
+ 3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7>
+ 3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7>
+ 2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7>
+ 2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7>
+ 2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u>
+ 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u>
+ 2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u>
+ 2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0>
+ 2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u>
+ 2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS
+ 2669066421U, // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7>
+ 2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0>
+ 1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u>
+ 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS
+ 1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS
+ 1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS
+ 2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2>
+ 1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS
+ 2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6>
+ 3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0>
+ 135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS
+ 1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1>
+ 1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS
+ 1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+ 2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+ 1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS
+ 1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS
+ 2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7>
+ 2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+ 1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS
+ 1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS
+ 1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2>
+ 1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS
+ 835584U, // <0,u,2,3>: Cost 0 copy LHS
+ 1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS
+ 3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6>
+ 1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS
+ 1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2>
+ 835584U, // <0,u,2,u>: Cost 0 copy LHS
+ 2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2>
+ 2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+ 2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u>
+ 2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+ 2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6>
+ 2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+ 2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+ 2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u>
+ 2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+ 2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS
+ 2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+ 3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+ 2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4>
+ 2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS
+ 1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS
+ 1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS
+ 2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6>
+ 1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS
+ 2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS
+ 2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0>
+ 3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7>
+ 2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7>
+ 2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS
+ 2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u>
+ 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS
+ 1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS
+ 2262496983U, // <0,u,6,1>: Cost 3 vrev <u,0,1,6>
+ 2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u>
+ 2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7>
+ 2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS
+ 2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u>
+ 2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u>
+ 1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u>
+ 1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u>
+ 2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS
+ 2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0>
+ 2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7>
+ 2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u>
+ 2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS
+ 2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6>
+ 2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7>
+ 2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7>
+ 2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS
+ 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS
+ 1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS
+ 1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS
+ 835584U, // <0,u,u,3>: Cost 0 copy LHS
+ 1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS
+ 1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS
+ 1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS
+ 1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u>
+ 835584U, // <0,u,u,u>: Cost 0 copy LHS
+ 2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0>
+ 1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1>
+ 2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2>
+ 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
+ 2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1>
+ 2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0>
+ 3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7>
+ 3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0>
+ 1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1>
+ 2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS
+ 2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1>
+ 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+ 3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3>
+ 2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS
+ 2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1>
+ 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
+ 3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2>
+ 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
+ 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
+ 2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1>
+ 2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0>
+ 2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1>
+ 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
+ 3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7>
+ 2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0>
+ 2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2>
+ 2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1>
+ 3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+ 3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1>
+ 67944550U, // <1,0,3,2>: Cost 1 vrev LHS
+ 2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3>
+ 2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS
+ 4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7>
+ 3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7>
+ 2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3>
+ 68386972U, // <1,0,3,u>: Cost 1 vrev LHS
+ 2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1>
+ 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
+ 2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6>
+ 3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1>
+ 3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1>
+ 2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS
+ 3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1>
+ 3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4>
+ 2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS
+ 4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0>
+ 2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS
+ 3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5>
+ 2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5>
+ 3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0>
+ 3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0>
+ 3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS
+ 3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1>
+ 2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7>
+ 3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7>
+ 3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6>
+ 3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1>
+ 3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0>
+ 3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0>
+ 2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0>
+ 2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0>
+ 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
+ 4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1>
+ 2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7>
+ 3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0>
+ 3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6>
+ 3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0>
+ 2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0>
+ 3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7>
+ 2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0>
+ 3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+ 1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1>
+ 67985515U, // <1,0,u,2>: Cost 1 vrev LHS
+ 2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1>
+ 2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6>
+ 2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS
+ 2669082807U, // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0>
+ 2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u>
+ 68427937U, // <1,0,u,u>: Cost 1 vrev LHS
+ 1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1>
+ 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
+ 2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1>
+ 2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2>
+ 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
+ 2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1>
+ 3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7>
+ 3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0>
+ 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
+ 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS
+ 2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0>
+ 2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3>
+ 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7>
+ 2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7>
+ 2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+ 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS
+ 2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2>
+ 2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1>
+ 2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2>
+ 2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1>
+ 2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS
+ 3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7>
+ 2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7>
+ 3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0>
+ 2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1>
+ 2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2>
+ 3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1>
+ 4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2>
+ 2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS
+ 2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6>
+ 2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7>
+ 3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7>
+ 2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3>
+ 2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS
+ 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
+ 2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4>
+ 3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0>
+ 3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5>
+ 2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS
+ 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS
+ 3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4>
+ 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1>
+ 2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3>
+ 4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2>
+ 2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7>
+ 2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5>
+ 2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5>
+ 2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0>
+ 2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+ 2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7>
+ 3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2>
+ 2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7>
+ 2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3>
+ 3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7>
+ 3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6>
+ 4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5>
+ 2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6>
+ 2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0>
+ 2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0>
+ 2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1>
+ 2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+ 3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3>
+ 4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS
+ 2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6>
+ 3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7>
+ 3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0>
+ 2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7>
+ 2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+ 1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS
+ 2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3>
+ 2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS
+ 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7>
+ 2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+ 202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS
+ 2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0>
+ 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
+ 2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2>
+ 2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1>
+ 2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5>
+ 3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7>
+ 2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2>
+ 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
+ 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
+ 2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2>
+ 2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1>
+ 2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0>
+ 2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS
+ 2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS
+ 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
+ 2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7>
+ 3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0>
+ 2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS
+ 2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2>
+ 2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2>
+ 2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2>
+ 2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3>
+ 3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5>
+ 3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7>
+ 2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7>
+ 2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1>
+ 2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3>
+ 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS
+ 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
+ 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS
+ 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+ 1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS
+ 2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2>
+ 3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6>
+ 3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4>
+ 2598759198U, // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2>
+ 2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4>
+ 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
+ 2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6>
+ 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
+ 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
+ 2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS
+ 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
+ 3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2>
+ 2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+ 2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS
+ 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
+ 2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0>
+ 2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7>
+ 2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS
+ 2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1>
+ 2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2>
+ 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
+ 2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7>
+ 3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5>
+ 3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7>
+ 2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6>
+ 2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2>
+ 2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7>
+ 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
+ 2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2>
+ 3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3>
+ 2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1>
+ 2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6>
+ 3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0>
+ 3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1>
+ 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
+ 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
+ 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS
+ 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
+ 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS
+ 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
+ 1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS
+ 2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0>
+ 1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS
+ 2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2>
+ 2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3>
+ 2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5>
+ 3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6>
+ 3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7>
+ 3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1>
+ 1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS
+ 2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2>
+ 2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1>
+ 2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3>
+ 1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS
+ 2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS
+ 2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7>
+ 3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7>
+ 2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1>
+ 1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS
+ 2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+ 2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3>
+ 2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+ 2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+ 2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+ 3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5>
+ 2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+ 3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3>
+ 2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+ 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
+ 1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3>
+ 2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2>
+ 2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3>
+ 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
+ 2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+ 2598826490U, // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3>
+ 3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7>
+ 1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS
+ 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
+ 2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3>
+ 3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3>
+ 2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4>
+ 2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS
+ 1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6>
+ 3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4>
+ 1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS
+ 2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7>
+ 2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5>
+ 2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5>
+ 2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS
+ 2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5>
+ 2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4>
+ 1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS
+ 1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS
+ 2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0>
+ 3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1>
+ 2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3>
+ 3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7>
+ 2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3>
+ 3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5>
+ 2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3>
+ 2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+ 2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+ 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
+ 2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3>
+ 3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7>
+ 2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3>
+ 2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6>
+ 2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5>
+ 3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1>
+ 2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7>
+ 2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1>
+ 1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS
+ 1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u>
+ 2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2>
+ 1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS
+ 1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS
+ 1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6>
+ 1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS
+ 1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS
+ 2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4>
+ 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
+ 3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4>
+ 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+ 2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5>
+ 1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1>
+ 2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2>
+ 3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1>
+ 1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1>
+ 3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2>
+ 2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4>
+ 3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0>
+ 3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6>
+ 3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1>
+ 2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS
+ 3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+ 3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0>
+ 2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS
+ 3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2>
+ 3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3>
+ 3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2>
+ 2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1>
+ 3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4>
+ 2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+ 3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7>
+ 3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2>
+ 2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS
+ 2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS
+ 2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4>
+ 2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3>
+ 3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3>
+ 2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6>
+ 3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5>
+ 3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6>
+ 3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3>
+ 3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u>
+ 2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1>
+ 3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4>
+ 3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4>
+ 3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4>
+ 2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4>
+ 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
+ 2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6>
+ 3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1>
+ 2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS
+ 2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS
+ 2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5>
+ 2598913774U, // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1>
+ 3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2>
+ 2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS
+ 2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+ 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS
+ 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
+ 2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1>
+ 3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2>
+ 2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2>
+ 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
+ 3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7>
+ 3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7>
+ 2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+ 2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS
+ 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
+ 3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1>
+ 3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4>
+ 3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4>
+ 3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS
+ 3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0>
+ 2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1>
+ 3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2>
+ 2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4>
+ 2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS
+ 2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS
+ 2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u>
+ 2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1>
+ 2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6>
+ 1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1>
+ 1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+ 1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
+ 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5>
+ 3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4>
+ 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+ 2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5>
+ 2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1>
+ 3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1>
+ 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
+ 2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1>
+ 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
+ 2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7>
+ 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
+ 2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1>
+ 2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7>
+ 3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS
+ 2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3>
+ 3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1>
+ 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
+ 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
+ 2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1>
+ 3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5>
+ 3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7>
+ 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
+ 4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS
+ 2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1>
+ 2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2>
+ 2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7>
+ 3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2>
+ 2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3>
+ 2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6>
+ 3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5>
+ 4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6>
+ 2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS
+ 2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS
+ 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
+ 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+ 3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3>
+ 3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4>
+ 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
+ 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS
+ 2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6>
+ 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS
+ 2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5>
+ 3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5>
+ 4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3>
+ 2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS
+ 2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5>
+ 2667794530U, // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0>
+ 2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7>
+ 2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS
+ 2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1>
+ 2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5>
+ 2667794938U, // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3>
+ 3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4>
+ 3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6>
+ 3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6>
+ 2667795256U, // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6>
+ 2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0>
+ 2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0>
+ 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
+ 2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1>
+ 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
+ 2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7>
+ 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
+ 2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7>
+ 3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0>
+ 2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1>
+ 2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS
+ 2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2>
+ 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3>
+ 2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1>
+ 1594054682U, // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
+ 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7>
+ 2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS
+ 2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS
+ 3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0>
+ 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
+ 2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6>
+ 3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1>
+ 2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5>
+ 2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6>
+ 2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6>
+ 4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS
+ 2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS
+ 3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2>
+ 2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1>
+ 3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0>
+ 3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3>
+ 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
+ 3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7>
+ 3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1>
+ 2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+ 2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS
+ 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
+ 3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0>
+ 3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2>
+ 3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1>
+ 3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS
+ 3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7>
+ 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
+ 2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS
+ 3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1>
+ 2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3>
+ 3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3>
+ 2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS
+ 4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5>
+ 2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3>
+ 3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+ 3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u>
+ 3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1>
+ 3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0>
+ 3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4>
+ 3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4>
+ 3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5>
+ 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
+ 3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS
+ 4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS
+ 2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS
+ 3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2>
+ 3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5>
+ 3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5>
+ 3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5>
+ 3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6>
+ 3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5>
+ 2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0>
+ 2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS
+ 2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS
+ 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
+ 3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6>
+ 3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7>
+ 3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0>
+ 3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS
+ 3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7>
+ 2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6>
+ 2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7>
+ 2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7>
+ 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
+ 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
+ 2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3>
+ 3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7>
+ 2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5>
+ 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
+ 2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7>
+ 4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS
+ 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
+ 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
+ 2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS
+ 3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2>
+ 3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1>
+ 2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5>
+ 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
+ 2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7>
+ 3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+ 1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1>
+ 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
+ 2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS
+ 3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1>
+ 3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1>
+ 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
+ 3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0>
+ 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
+ 2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7>
+ 2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS
+ 2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS
+ 2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1>
+ 3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0>
+ 2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1>
+ 2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS
+ 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
+ 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
+ 2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1>
+ 2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7>
+ 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
+ 3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7>
+ 3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2>
+ 2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1>
+ 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
+ 3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7>
+ 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
+ 3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2>
+ 2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS
+ 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
+ 2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7>
+ 2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2>
+ 2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3>
+ 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
+ 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
+ 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
+ 2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2>
+ 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
+ 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
+ 3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1>
+ 3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4>
+ 3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4>
+ 3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4>
+ 2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS
+ 3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0>
+ 3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6>
+ 2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS
+ 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
+ 2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7>
+ 3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2>
+ 2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5>
+ 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
+ 4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6>
+ 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
+ 3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS
+ 2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS
+ 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
+ 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
+ 3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7>
+ 3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6>
+ 3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS
+ 3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6>
+ 3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6>
+ 2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0>
+ 2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0>
+ 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
+ 3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1>
+ 3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1>
+ 3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0>
+ 3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS
+ 2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7>
+ 3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7>
+ 2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7>
+ 2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1>
+ 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
+ 2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS
+ 2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2>
+ 2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u>
+ 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
+ 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
+ 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
+ 2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2>
+ 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
+ 1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u>
+ 1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS
+ 2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2>
+ 2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u>
+ 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
+ 1658771190U, // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1>
+ 2736789248U, // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2>
+ 2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1>
+ 1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS
+ 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS
+ 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+ 1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS
+ 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7>
+ 3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+ 2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+ 202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS
+ 2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+ 2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS
+ 2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+ 2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+ 2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+ 2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+ 2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+ 2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+ 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS
+ 1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 115726126U, // <1,u,3,2>: Cost 1 vrev LHS
+ 2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS
+ 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS
+ 1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3>
+ 1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS
+ 403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS
+ 2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1>
+ 2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0>
+ 2263217967U, // <1,u,4,2>: Cost 3 vrev <u,1,2,4>
+ 2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4>
+ 2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS
+ 1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS
+ 2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6>
+ 2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6>
+ 1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS
+ 2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS
+ 2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7>
+ 3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+ 2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS
+ 2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+ 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS
+ 1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS
+ 2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u>
+ 2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0>
+ 2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3>
+ 2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7>
+ 2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4>
+ 3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7>
+ 2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u>
+ 2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+ 2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+ 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
+ 2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1>
+ 2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1>
+ 2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7>
+ 2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS
+ 2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u>
+ 2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7>
+ 2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7>
+ 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
+ 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS
+ 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS
+ 115767091U, // <1,u,u,2>: Cost 1 vrev LHS
+ 1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS
+ 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS
+ 1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS
+ 1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS
+ 403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS
+ 2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0>
+ 2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1>
+ 1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2>
+ 2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0>
+ 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
+ 3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5>
+ 2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0>
+ 4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7>
+ 1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2>
+ 2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1>
+ 2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0>
+ 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS
+ 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
+ 3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7>
+ 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
+ 3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2>
+ 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
+ 1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS
+ 2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2>
+ 2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0>
+ 2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2>
+ 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
+ 2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3>
+ 2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2>
+ 2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2>
+ 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
+ 2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+ 2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1>
+ 2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2>
+ 4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3>
+ 3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS
+ 3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5>
+ 4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6>
+ 3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7>
+ 2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u>
+ 2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS
+ 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
+ 2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6>
+ 3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2>
+ 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
+ 2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS
+ 2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+ 3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5>
+ 2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6>
+ 3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS
+ 3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5>
+ 2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7>
+ 3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5>
+ 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5>
+ 3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5>
+ 3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0>
+ 2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+ 2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5>
+ 4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0>
+ 2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS
+ 2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7>
+ 3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6>
+ 2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6>
+ 3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0>
+ 3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6>
+ 3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7>
+ 2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6>
+ 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+ 3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3>
+ 2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1>
+ 3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0>
+ 3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6>
+ 3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2>
+ 3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0>
+ 3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7>
+ 2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+ 1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS
+ 2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1>
+ 1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2>
+ 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
+ 2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS
+ 2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+ 2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+ 1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS
+ 2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS
+ 3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0>
+ 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
+ 2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS
+ 2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2>
+ 3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0>
+ 2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1>
+ 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
+ 2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS
+ 2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1>
+ 2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0>
+ 2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3>
+ 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
+ 3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7>
+ 3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1>
+ 3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2>
+ 2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3>
+ 2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS
+ 3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1>
+ 2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2>
+ 2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0>
+ 2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS
+ 3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3>
+ 3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7>
+ 2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0>
+ 2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0>
+ 2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS
+ 2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1>
+ 2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2>
+ 2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3>
+ 2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS
+ 2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5>
+ 2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+ 4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7>
+ 2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u>
+ 2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5>
+ 3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6>
+ 3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5>
+ 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4>
+ 3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS
+ 2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS
+ 3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS
+ 3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6>
+ 1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4>
+ 2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS
+ 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
+ 3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1>
+ 2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7>
+ 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
+ 3740913668U, // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5>
+ 3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5>
+ 3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS
+ 2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7>
+ 2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS
+ 3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2>
+ 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
+ 3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS
+ 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
+ 4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5>
+ 3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6>
+ 3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1>
+ 2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS
+ 2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2>
+ 3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1>
+ 3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0>
+ 2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7>
+ 3740915046U, // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6>
+ 3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7>
+ 3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1>
+ 3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u>
+ 2669827714U, // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2>
+ 2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS
+ 2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1>
+ 2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2>
+ 1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u>
+ 2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS
+ 2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5>
+ 2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+ 2669828370U, // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1>
+ 1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u>
+ 1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2>
+ 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
+ 2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0>
+ 2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2>
+ 2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS
+ 3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7>
+ 3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6>
+ 3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2>
+ 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
+ 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
+ 2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1>
+ 2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0>
+ 2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS
+ 3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS
+ 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
+ 3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3>
+ 3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1>
+ 2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS
+ 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
+ 2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2>
+ 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS
+ 2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3>
+ 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
+ 2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7>
+ 2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7>
+ 2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+ 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS
+ 2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1>
+ 2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0>
+ 2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2>
+ 1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS
+ 2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5>
+ 3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5>
+ 2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6>
+ 2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3>
+ 1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS
+ 2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS
+ 3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2>
+ 2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5>
+ 3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5>
+ 2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS
+ 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
+ 2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS
+ 3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4>
+ 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
+ 3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2>
+ 2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3>
+ 2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7>
+ 4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS
+ 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5>
+ 2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5>
+ 2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0>
+ 2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6>
+ 3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3>
+ 2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3>
+ 2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7>
+ 3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS
+ 3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7>
+ 3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6>
+ 2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2>
+ 2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7>
+ 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
+ 3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5>
+ 2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2>
+ 4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS
+ 2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6>
+ 3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7>
+ 3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7>
+ 2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7>
+ 2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1>
+ 1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS
+ 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
+ 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS
+ 1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS
+ 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
+ 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
+ 2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6>
+ 2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS
+ 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS
+ 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3>
+ 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+ 2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+ 2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0>
+ 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS
+ 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
+ 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
+ 2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0>
+ 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+ 2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2>
+ 2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+ 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+ 1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5>
+ 2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7>
+ 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1>
+ 1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+ 1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3>
+ 1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3>
+ 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
+ 1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7>
+ 2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3>
+ 2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+ 1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2>
+ 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
+ 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
+ 2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+ 2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2>
+ 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
+ 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS
+ 1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4>
+ 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS
+ 2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS
+ 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+ 2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4>
+ 2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6>
+ 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+ 1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+ 1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7>
+ 2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+ 2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6>
+ 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+ 2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5>
+ 2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+ 2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7>
+ 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1>
+ 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+ 2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3>
+ 2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3>
+ 2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+ 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+ 2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+ 2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1>
+ 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+ 1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+ 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS
+ 1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+ 1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1>
+ 1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+ 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS
+ 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+ 1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1>
+ 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS
+ 2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4>
+ 2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS
+ 2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4>
+ 2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4>
+ 2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2>
+ 2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1>
+ 1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+ 3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1>
+ 1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+ 2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2>
+ 3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1>
+ 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
+ 2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS
+ 3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4>
+ 3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7>
+ 3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3>
+ 3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1>
+ 2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS
+ 2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4>
+ 3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3>
+ 2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4>
+ 2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5>
+ 2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4>
+ 2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS
+ 3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS
+ 3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0>
+ 2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS
+ 2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4>
+ 4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1>
+ 3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4>
+ 3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3>
+ 3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4>
+ 2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+ 3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7>
+ 2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+ 2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2>
+ 3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1>
+ 3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2>
+ 3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3>
+ 2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4>
+ 2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS
+ 2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS
+ 4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7>
+ 2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS
+ 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
+ 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
+ 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
+ 2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5>
+ 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
+ 3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5>
+ 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+ 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS
+ 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
+ 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
+ 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
+ 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
+ 2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+ 2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6>
+ 2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1>
+ 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
+ 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+ 3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2>
+ 3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4>
+ 3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4>
+ 3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6>
+ 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
+ 2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1>
+ 3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7>
+ 2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+ 1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS
+ 2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS
+ 2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2>
+ 2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2>
+ 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
+ 2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+ 1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0>
+ 2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
+ 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
+ 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
+ 2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5>
+ 3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7>
+ 3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+ 2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS
+ 3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2>
+ 3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1>
+ 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
+ 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
+ 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
+ 3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7>
+ 3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2>
+ 2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3>
+ 2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5>
+ 3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2>
+ 4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1>
+ 2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2>
+ 3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1>
+ 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
+ 3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1>
+ 3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7>
+ 3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS
+ 2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3>
+ 2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5>
+ 3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3>
+ 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
+ 2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3>
+ 2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7>
+ 2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u>
+ 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
+ 3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4>
+ 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
+ 2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4>
+ 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
+ 2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS
+ 3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6>
+ 3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+ 2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS
+ 2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS
+ 3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7>
+ 3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5>
+ 3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5>
+ 2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS
+ 2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5>
+ 4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6>
+ 2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7>
+ 2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7>
+ 2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS
+ 3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6>
+ 2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3>
+ 3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6>
+ 2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5>
+ 3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7>
+ 4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6>
+ 3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS
+ 3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS
+ 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
+ 2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2>
+ 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
+ 3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2>
+ 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
+ 2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7>
+ 2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2>
+ 4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS
+ 2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS
+ 2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS
+ 2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u>
+ 2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u>
+ 2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6>
+ 2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS
+ 2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7>
+ 2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0>
+ 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2>
+ 3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0>
+ 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
+ 3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6>
+ 2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1>
+ 2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2>
+ 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
+ 2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1>
+ 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
+ 2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
+ 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
+ 3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7>
+ 2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2>
+ 2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1>
+ 2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3>
+ 2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2>
+ 2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1>
+ 2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6>
+ 2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7>
+ 2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6>
+ 2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS
+ 2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1>
+ 2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2>
+ 3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1>
+ 2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7>
+ 2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3>
+ 2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6>
+ 3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5>
+ 3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+ 1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS
+ 1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS
+ 2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2>
+ 2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3>
+ 2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0>
+ 3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4>
+ 2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4>
+ 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
+ 2667875700U, // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6>
+ 4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS
+ 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
+ 3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2>
+ 2667876048U, // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
+ 2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7>
+ 3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5>
+ 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5>
+ 2667876356U, // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5>
+ 2667876450U, // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
+ 2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS
+ 2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS
+ 2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS
+ 3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1>
+ 2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6>
+ 3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3>
+ 2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS
+ 3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5>
+ 2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6>
+ 2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS
+ 2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS
+ 2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1>
+ 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
+ 2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2>
+ 2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0>
+ 2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5>
+ 3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7>
+ 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
+ 2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0>
+ 2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1>
+ 2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2>
+ 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3>
+ 2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 1594136612U, // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
+ 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
+ 2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7>
+ 1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS
+ 1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS
+ 2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2>
+ 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
+ 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
+ 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0>
+ 2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2>
+ 2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7>
+ 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
+ 2599760953U, // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2>
+ 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
+ 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+ 3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1>
+ 3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0>
+ 3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7>
+ 3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5>
+ 3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7>
+ 2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7>
+ 3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0>
+ 2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+ 3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2>
+ 3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7>
+ 2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2>
+ 3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1>
+ 3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6>
+ 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
+ 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
+ 3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1>
+ 2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7>
+ 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
+ 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
+ 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
+ 2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3>
+ 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
+ 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
+ 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
+ 2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2>
+ 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
+ 2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6>
+ 3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4>
+ 3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7>
+ 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4>
+ 3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4>
+ 2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS
+ 2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u>
+ 3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0>
+ 2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS
+ 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
+ 3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7>
+ 3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7>
+ 3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5>
+ 2599800118U, // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
+ 3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5>
+ 3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7>
+ 2599802214U, // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6>
+ 2599802670U, // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS
+ 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
+ 3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7>
+ 2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3>
+ 3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6>
+ 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
+ 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
+ 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
+ 3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0>
+ 2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS
+ 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
+ 3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2>
+ 3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7>
+ 3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7>
+ 2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS
+ 3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5>
+ 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
+ 2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7>
+ 2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1>
+ 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
+ 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
+ 2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2>
+ 2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u>
+ 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
+ 2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS
+ 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
+ 2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2>
+ 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
+ 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS
+ 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 1658631909U, // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2>
+ 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+ 1658853120U, // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2>
+ 3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+ 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS
+ 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+ 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+ 2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3>
+ 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
+ 1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS
+ 2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+ 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS
+ 1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
+ 2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS
+ 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1>
+ 1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3>
+ 1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS
+ 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5>
+ 1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3>
+ 1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS
+ 1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS
+ 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
+ 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
+ 2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5>
+ 1190213513U, // <2,u,4,3>: Cost 2 vrev <u,2,3,4>
+ 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
+ 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS
+ 1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+ 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS
+ 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
+ 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+ 2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5>
+ 2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7>
+ 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+ 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS
+ 2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2>
+ 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+ 2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7>
+ 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
+ 2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+ 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS
+ 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+ 2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u>
+ 2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7>
+ 2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+ 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+ 2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+ 2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7>
+ 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+ 1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS
+ 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS
+ 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS
+ 1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS
+ 1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS
+ 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS
+ 1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS
+ 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS
+ 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+ 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+ 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+ 3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1>
+ 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
+ 3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7>
+ 3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1>
+ 3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0>
+ 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
+ 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
+ 2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0>
+ 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS
+ 2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3>
+ 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
+ 2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7>
+ 2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7>
+ 2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1>
+ 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS
+ 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+ 2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0>
+ 2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+ 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7>
+ 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
+ 2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7>
+ 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+ 2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2>
+ 2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+ 2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1>
+ 2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3>
+ 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
+ 3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7>
+ 2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7>
+ 3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1>
+ 2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3>
+ 2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4>
+ 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+ 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+ 3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1>
+ 2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6>
+ 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
+ 2666573172U, // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6>
+ 3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4>
+ 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+ 2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7>
+ 2666573520U, // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3>
+ 3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS
+ 3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6>
+ 2666573766U, // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6>
+ 2666573828U, // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
+ 2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7>
+ 2666573992U, // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7>
+ 3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS
+ 2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7>
+ 2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7>
+ 2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7>
+ 3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2>
+ 3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1>
+ 3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0>
+ 2666574648U, // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
+ 2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0>
+ 2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7>
+ 2666574842U, // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2>
+ 2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7>
+ 2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0>
+ 3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7>
+ 2666575206U, // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
+ 3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7>
+ 3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3>
+ 2666575468U, // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
+ 2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0>
+ 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
+ 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+ 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS
+ 2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+ 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+ 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
+ 2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7>
+ 2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u>
+ 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS
+ 2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS
+ 2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS
+ 2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1>
+ 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+ 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
+ 2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1>
+ 3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6>
+ 3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0>
+ 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+ 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
+ 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
+ 2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1>
+ 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+ 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
+ 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+ 3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5>
+ 3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3>
+ 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
+ 2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1>
+ 2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+ 2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2>
+ 1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0>
+ 2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS
+ 2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+ 3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7>
+ 2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0>
+ 1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0>
+ 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
+ 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+ 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+ 2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1>
+ 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
+ 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+ 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+ 2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3>
+ 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+ 2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS
+ 2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5>
+ 2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5>
+ 2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5>
+ 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
+ 2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS
+ 2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS
+ 3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4>
+ 2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5>
+ 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
+ 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
+ 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
+ 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+ 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
+ 3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7>
+ 3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0>
+ 2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+ 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+ 3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1>
+ 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+ 2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7>
+ 2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+ 3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5>
+ 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+ 3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7>
+ 2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0>
+ 2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7>
+ 3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS
+ 2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1>
+ 4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2>
+ 3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS
+ 2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7>
+ 3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7>
+ 3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7>
+ 3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
+ 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
+ 2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+ 1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0>
+ 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
+ 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+ 2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+ 2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+ 1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3>
+ 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
+ 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
+ 2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0>
+ 2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+ 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
+ 2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7>
+ 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+ 2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0>
+ 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
+ 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
+ 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
+ 2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0>
+ 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+ 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
+ 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
+ 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+ 3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1>
+ 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1>
+ 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
+ 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
+ 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
+ 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+ 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
+ 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+ 2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6>
+ 3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3>
+ 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
+ 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
+ 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
+ 2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2>
+ 2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3>
+ 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
+ 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
+ 2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2>
+ 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
+ 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
+ 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
+ 2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4>
+ 2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4>
+ 2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+ 2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS
+ 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0>
+ 2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4>
+ 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS
+ 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5>
+ 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+ 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+ 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5>
+ 2665263108U, // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
+ 2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+ 2665263272U, // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7>
+ 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5>
+ 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
+ 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
+ 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
+ 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+ 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
+ 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+ 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+ 2665263950U, // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1>
+ 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+ 2665264122U, // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2>
+ 2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3>
+ 4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2>
+ 2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS
+ 2665264486U, // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6>
+ 2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7>
+ 2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7>
+ 2665264748U, // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
+ 2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS
+ 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
+ 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
+ 1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2>
+ 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+ 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
+ 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0>
+ 2665265408U, // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1>
+ 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
+ 2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0>
+ 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
+ 2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0>
+ 2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2>
+ 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
+ 2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2>
+ 3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2>
+ 4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7>
+ 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
+ 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
+ 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
+ 2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3>
+ 2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1>
+ 2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS
+ 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
+ 3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3>
+ 2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3>
+ 1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3>
+ 2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS
+ 2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3>
+ 1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3>
+ 2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0>
+ 2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS
+ 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
+ 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
+ 2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3>
+ 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
+ 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
+ 2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3>
+ 2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3>
+ 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS
+ 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
+ 2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5>
+ 2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7>
+ 2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3>
+ 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS
+ 2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS
+ 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
+ 2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4>
+ 2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6>
+ 2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4>
+ 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
+ 2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS
+ 4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7>
+ 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
+ 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
+ 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
+ 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
+ 2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5>
+ 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
+ 3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5>
+ 2665934946U, // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0>
+ 2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
+ 3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7>
+ 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
+ 2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7>
+ 2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7>
+ 3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7>
+ 2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6>
+ 2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3>
+ 2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3>
+ 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
+ 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
+ 2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7>
+ 2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3>
+ 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
+ 2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7>
+ 3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3>
+ 3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7>
+ 2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS
+ 1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS
+ 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
+ 1592858504U, // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3>
+ 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS
+ 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
+ 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
+ 2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3>
+ 2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS
+ 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
+ 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
+ 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
+ 3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4>
+ 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
+ 1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1>
+ 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
+ 3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0>
+ 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
+ 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
+ 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
+ 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
+ 2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3>
+ 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
+ 2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0>
+ 2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+ 2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4>
+ 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
+ 3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS
+ 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
+ 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
+ 2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1>
+ 2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3>
+ 2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3>
+ 2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0>
+ 2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4>
+ 2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0>
+ 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
+ 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
+ 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
+ 2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3>
+ 2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1>
+ 2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS
+ 3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS
+ 3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1>
+ 2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2>
+ 2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS
+ 2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4>
+ 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
+ 2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+ 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+ 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
+ 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
+ 3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4>
+ 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
+ 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
+ 2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+ 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
+ 2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2>
+ 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
+ 2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7>
+ 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS
+ 2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5>
+ 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS
+ 2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1>
+ 2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6>
+ 2666607098U, // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3>
+ 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
+ 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+ 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+ 2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7>
+ 2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4>
+ 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
+ 2666607610U, // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
+ 3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5>
+ 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
+ 3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7>
+ 2666607974U, // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
+ 2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0>
+ 2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0>
+ 2666608236U, // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
+ 2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4>
+ 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
+ 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
+ 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
+ 2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1>
+ 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
+ 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
+ 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS
+ 2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u>
+ 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS
+ 3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0>
+ 2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS
+ 2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5>
+ 3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4>
+ 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
+ 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+ 2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1>
+ 2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0>
+ 2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS
+ 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
+ 3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1>
+ 2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5>
+ 2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5>
+ 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
+ 2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3>
+ 3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7>
+ 1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3>
+ 1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3>
+ 3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3>
+ 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
+ 3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2>
+ 2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5>
+ 2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5>
+ 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
+ 3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7>
+ 2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3>
+ 2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5>
+ 3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2>
+ 2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5>
+ 3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4>
+ 2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3>
+ 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
+ 2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5>
+ 4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6>
+ 3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS
+ 3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS
+ 2600304742U, // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS
+ 3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5>
+ 2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4>
+ 3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0>
+ 2600308022U, // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS
+ 2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS
+ 2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+ 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
+ 1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6>
+ 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
+ 2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3>
+ 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
+ 2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5>
+ 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
+ 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+ 2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6>
+ 1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7>
+ 1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7>
+ 2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1>
+ 2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7>
+ 2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6>
+ 2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4>
+ 2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5>
+ 2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7>
+ 2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7>
+ 1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0>
+ 1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0>
+ 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
+ 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
+ 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
+ 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
+ 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
+ 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+ 2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0>
+ 2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7>
+ 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
+ 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
+ 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
+ 2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2>
+ 2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2>
+ 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
+ 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
+ 2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+ 1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3>
+ 1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS
+ 2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS
+ 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
+ 2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4>
+ 3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4>
+ 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
+ 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
+ 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
+ 2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+ 2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2>
+ 3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3>
+ 3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1>
+ 2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6>
+ 3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1>
+ 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
+ 3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3>
+ 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
+ 2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3>
+ 2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6>
+ 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
+ 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
+ 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
+ 3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0>
+ 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
+ 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
+ 2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3>
+ 1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3>
+ 1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3>
+ 3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2>
+ 3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3>
+ 3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3>
+ 2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3>
+ 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
+ 3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6>
+ 3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2>
+ 2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+ 2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS
+ 3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6>
+ 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
+ 2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5>
+ 3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6>
+ 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
+ 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
+ 2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0>
+ 2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS
+ 2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6>
+ 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
+ 3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2>
+ 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
+ 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+ 2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5>
+ 3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6>
+ 4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6>
+ 2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5>
+ 2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS
+ 2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1>
+ 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
+ 2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3>
+ 3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6>
+ 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
+ 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
+ 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
+ 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
+ 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
+ 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
+ 2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7>
+ 2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3>
+ 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
+ 2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1>
+ 2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2>
+ 2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS
+ 1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1>
+ 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
+ 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
+ 2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u>
+ 2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6>
+ 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
+ 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
+ 1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3>
+ 1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1>
+ 2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0>
+ 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2>
+ 3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0>
+ 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
+ 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
+ 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
+ 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
+ 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
+ 2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1>
+ 2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0>
+ 2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7>
+ 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
+ 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
+ 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
+ 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
+ 1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7>
+ 2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS
+ 3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3>
+ 2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2>
+ 2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1>
+ 2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS
+ 2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7>
+ 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
+ 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
+ 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
+ 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
+ 2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3>
+ 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
+ 2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3>
+ 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
+ 2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7>
+ 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
+ 2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7>
+ 2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2>
+ 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
+ 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
+ 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
+ 3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7>
+ 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
+ 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
+ 2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4>
+ 2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6>
+ 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
+ 2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2>
+ 2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3>
+ 3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3>
+ 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
+ 2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5>
+ 2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5>
+ 2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7>
+ 2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS
+ 2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS
+ 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
+ 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
+ 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
+ 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
+ 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
+ 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
+ 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
+ 2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0>
+ 2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7>
+ 2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS
+ 2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7>
+ 2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7>
+ 2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7>
+ 2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS
+ 2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7>
+ 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
+ 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
+ 2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2>
+ 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0>
+ 2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS
+ 2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6>
+ 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
+ 1595545808U, // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
+ 1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS
+ 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+ 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
+ 1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2>
+ 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+ 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
+ 1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1>
+ 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
+ 2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+ 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
+ 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
+ 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
+ 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS
+ 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
+ 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
+ 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
+ 2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u>
+ 1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3>
+ 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS
+ 1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3>
+ 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
+ 1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0>
+ 1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7>
+ 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
+ 1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3>
+ 1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0>
+ 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
+ 1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3>
+ 2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2>
+ 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS
+ 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
+ 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+ 2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7>
+ 2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+ 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS
+ 2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS
+ 1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5>
+ 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+ 2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5>
+ 1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+ 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
+ 1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6>
+ 1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6>
+ 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
+ 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
+ 2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7>
+ 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
+ 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+ 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
+ 1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+ 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS
+ 1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7>
+ 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS
+ 2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1>
+ 2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6>
+ 2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7>
+ 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+ 1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+ 2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7>
+ 1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0>
+ 1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7>
+ 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
+ 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
+ 2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2>
+ 3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
+ 1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+ 2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2>
+ 1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS
+ 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
+ 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
+ 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS
+ 336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS
+ 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
+ 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
+ 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS
+ 1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0>
+ 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS
+ 2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0>
+ 2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1>
+ 2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2>
+ 3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4>
+ 2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4>
+ 3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0>
+ 3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0>
+ 3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0>
+ 2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1>
+ 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
+ 2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4>
+ 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1>
+ 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
+ 2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4>
+ 3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1>
+ 2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1>
+ 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
+ 3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4>
+ 2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4>
+ 2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0>
+ 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
+ 3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7>
+ 2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+ 3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2>
+ 2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4>
+ 3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS
+ 2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+ 2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4>
+ 3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4>
+ 3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS
+ 3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5>
+ 3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7>
+ 3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0>
+ 2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+ 2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS
+ 2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5>
+ 2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6>
+ 3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2>
+ 2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS
+ 2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS
+ 3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2>
+ 3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4>
+ 2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS
+ 2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS
+ 1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS
+ 2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2>
+ 2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5>
+ 2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS
+ 3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7>
+ 3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7>
+ 2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5>
+ 1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS
+ 2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS
+ 3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1>
+ 1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS
+ 2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6>
+ 2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS
+ 2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6>
+ 3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6>
+ 2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0>
+ 1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS
+ 3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2>
+ 3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS
+ 4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS
+ 3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0>
+ 3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5>
+ 3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5>
+ 3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0>
+ 3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7>
+ 4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS
+ 2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS
+ 1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS
+ 1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u>
+ 2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS
+ 2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS
+ 2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+ 2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u>
+ 1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1>
+ 2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS
+ 3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6>
+ 2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2>
+ 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
+ 3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4>
+ 3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1>
+ 3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4>
+ 2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS
+ 3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2>
+ 2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4>
+ 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
+ 2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3>
+ 3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5>
+ 3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1>
+ 3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7>
+ 3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1>
+ 2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3>
+ 2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS
+ 2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4>
+ 3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2>
+ 1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4>
+ 2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS
+ 3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3>
+ 3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7>
+ 3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2>
+ 1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4>
+ 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
+ 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
+ 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
+ 2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4>
+ 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
+ 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
+ 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2>
+ 2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3>
+ 3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1>
+ 3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0>
+ 3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5>
+ 3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS
+ 3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5>
+ 2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS
+ 3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4>
+ 3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0>
+ 2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS
+ 1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+ 2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2>
+ 2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5>
+ 2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2>
+ 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+ 2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7>
+ 2600686074U, // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3>
+ 2600686586U, // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+ 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
+ 2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS
+ 2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1>
+ 4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2>
+ 3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS
+ 2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS
+ 2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6>
+ 4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7>
+ 3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1>
+ 2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS
+ 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+ 3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1>
+ 3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1>
+ 2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4>
+ 3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6>
+ 3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4>
+ 3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7>
+ 3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7>
+ 2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+ 1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS
+ 2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2>
+ 2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2>
+ 1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4>
+ 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
+ 2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS
+ 2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 2600686586U, // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+ 1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS
+ 2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2>
+ 2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS
+ 2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6>
+ 3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2>
+ 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
+ 3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7>
+ 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
+ 3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2>
+ 2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS
+ 3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2>
+ 3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1>
+ 3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0>
+ 3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS
+ 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+ 3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7>
+ 3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3>
+ 3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3>
+ 2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+ 3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4>
+ 3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3>
+ 2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2>
+ 2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3>
+ 3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0>
+ 3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7>
+ 3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6>
+ 3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2>
+ 2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3>
+ 2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1>
+ 3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1>
+ 3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2>
+ 2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4>
+ 2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5>
+ 2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4>
+ 3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4>
+ 2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4>
+ 2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1>
+ 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
+ 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
+ 2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4>
+ 2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4>
+ 2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS
+ 2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4>
+ 3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0>
+ 2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS
+ 2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0>
+ 2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2>
+ 2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS
+ 2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS
+ 3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7>
+ 2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7>
+ 3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS
+ 2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS
+ 1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS
+ 2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2>
+ 2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2>
+ 2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS
+ 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
+ 2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6>
+ 2600767994U, // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3>
+ 2600768506U, // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2>
+ 1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS
+ 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+ 3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2>
+ 3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2>
+ 2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4>
+ 4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4>
+ 3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7>
+ 3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4>
+ 3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7>
+ 2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+ 1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS
+ 2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2>
+ 2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2>
+ 2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS
+ 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
+ 2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u>
+ 2600784890U, // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2>
+ 1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS
+ 3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0>
+ 2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2>
+ 2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4>
+ 3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3>
+ 3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1>
+ 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0>
+ 3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0>
+ 3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0>
+ 2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2>
+ 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1>
+ 3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1>
+ 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
+ 2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4>
+ 3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0>
+ 3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3>
+ 3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1>
+ 3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3>
+ 2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4>
+ 3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS
+ 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
+ 3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2>
+ 2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4>
+ 3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS
+ 2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4>
+ 3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3>
+ 3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3>
+ 2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3>
+ 3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1>
+ 3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1>
+ 3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3>
+ 2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+ 2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4>
+ 3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7>
+ 3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7>
+ 3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7>
+ 2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+ 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
+ 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
+ 2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4>
+ 3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0>
+ 2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5>
+ 2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6>
+ 2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4>
+ 3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2>
+ 2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1>
+ 2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS
+ 2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5>
+ 2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5>
+ 2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3>
+ 2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS
+ 4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5>
+ 3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5>
+ 2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4>
+ 2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS
+ 2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS
+ 2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6>
+ 2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6>
+ 2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3>
+ 2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS
+ 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6>
+ 4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6>
+ 2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4>
+ 1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6>
+ 3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1>
+ 3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5>
+ 3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7>
+ 3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7>
+ 3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5>
+ 3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7>
+ 3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7>
+ 3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4>
+ 3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1>
+ 2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS
+ 2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2>
+ 2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u>
+ 2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4>
+ 2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS
+ 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u>
+ 2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u>
+ 2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4>
+ 1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u>
+ 2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4>
+ 1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+ 3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1>
+ 2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0>
+ 2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1>
+ 2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2>
+ 3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0>
+ 1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2>
+ 2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1>
+ 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
+ 2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3>
+ 2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3>
+ 2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4>
+ 3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3>
+ 3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3>
+ 2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3>
+ 3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4>
+ 3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4>
+ 2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2>
+ 2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4>
+ 2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4>
+ 3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7>
+ 2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4>
+ 3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4>
+ 2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4>
+ 2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2>
+ 3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4>
+ 3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3>
+ 2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+ 2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4>
+ 3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5>
+ 3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4>
+ 3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4>
+ 2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+ 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
+ 2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4>
+ 2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2>
+ 2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4>
+ 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS
+ 1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS
+ 2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS
+ 2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+ 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS
+ 2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS
+ 2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4>
+ 3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5>
+ 2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5>
+ 2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS
+ 1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS
+ 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5>
+ 1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS
+ 3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2>
+ 2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5>
+ 2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6>
+ 2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS
+ 2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6>
+ 1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS
+ 2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4>
+ 1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS
+ 2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2>
+ 3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4>
+ 3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7>
+ 3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4>
+ 2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+ 2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4>
+ 2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4>
+ 2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7>
+ 2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+ 1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS
+ 1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+ 2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u>
+ 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS
+ 1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS
+ 1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u>
+ 161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS
+ 2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0>
+ 1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5>
+ 2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0>
+ 2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5>
+ 3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7>
+ 2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5>
+ 2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0>
+ 1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2>
+ 2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5>
+ 2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0>
+ 2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS
+ 3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4>
+ 2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5>
+ 3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6>
+ 2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3>
+ 2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS
+ 2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS
+ 3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3>
+ 2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5>
+ 1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5>
+ 2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5>
+ 3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7>
+ 2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7>
+ 4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS
+ 1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5>
+ 2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2>
+ 3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1>
+ 2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4>
+ 2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3>
+ 2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0>
+ 2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5>
+ 2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4>
+ 2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5>
+ 2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2>
+ 2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS
+ 3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4>
+ 2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5>
+ 2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4>
+ 2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS
+ 1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS
+ 2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5>
+ 2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6>
+ 1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS
+ 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
+ 2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3>
+ 2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4>
+ 2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2>
+ 1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5>
+ 2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5>
+ 2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0>
+ 2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS
+ 1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS
+ 1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS
+ 2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6>
+ 2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6>
+ 1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6>
+ 1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS
+ 3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5>
+ 2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6>
+ 27705344U, // <4,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,6,u>: Cost 0 copy RHS
+ 2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS
+ 2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4>
+ 2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7>
+ 2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5>
+ 2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS
+ 2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7>
+ 2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4>
+ 2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4>
+ 2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS
+ 1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS
+ 1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0>
+ 1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u>
+ 1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS
+ 1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS
+ 2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7>
+ 27705344U, // <4,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,u,u>: Cost 0 copy RHS
+ 2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0>
+ 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
+ 1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6>
+ 3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0>
+ 2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5>
+ 3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7>
+ 2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0>
+ 4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS
+ 1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS
+ 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
+ 2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1>
+ 2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0>
+ 2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3>
+ 3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5>
+ 2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7>
+ 3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7>
+ 4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS
+ 2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3>
+ 2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4>
+ 2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3>
+ 2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2>
+ 2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1>
+ 2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6>
+ 2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7>
+ 2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7>
+ 2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3>
+ 2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1>
+ 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
+ 2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6>
+ 3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2>
+ 2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3>
+ 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
+ 2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6>
+ 3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6>
+ 2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4>
+ 2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2>
+ 2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS
+ 2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3>
+ 2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4>
+ 2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4>
+ 2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS
+ 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
+ 1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS
+ 2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4>
+ 1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS
+ 2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS
+ 2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3>
+ 2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3>
+ 3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3>
+ 2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6>
+ 2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5>
+ 2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6>
+ 2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS
+ 2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS
+ 1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS
+ 2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2>
+ 2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3>
+ 2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2>
+ 1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6>
+ 2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6>
+ 2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6>
+ 2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS
+ 1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS
+ 2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2>
+ 2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2>
+ 3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7>
+ 2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4>
+ 2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6>
+ 2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+ 3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3>
+ 2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+ 2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2>
+ 1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS
+ 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
+ 1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS
+ 2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1>
+ 1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u>
+ 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
+ 1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS
+ 2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS
+ 1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS
+ 3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0>
+ 2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4>
+ 3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4>
+ 3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5>
+ 2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0>
+ 3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7>
+ 3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4>
+ 2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1>
+ 3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1>
+ 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
+ 3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5>
+ 3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS
+ 3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7>
+ 3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7>
+ 3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3>
+ 2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1>
+ 3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS
+ 3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3>
+ 3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2>
+ 2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4>
+ 3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7>
+ 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
+ 3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7>
+ 3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3>
+ 2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7>
+ 3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2>
+ 3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4>
+ 3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4>
+ 3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3>
+ 3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6>
+ 2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7>
+ 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
+ 3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4>
+ 2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7>
+ 2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1>
+ 3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3>
+ 3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7>
+ 3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5>
+ 2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4>
+ 2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS
+ 2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4>
+ 3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7>
+ 2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS
+ 2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2>
+ 3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7>
+ 2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5>
+ 3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5>
+ 2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6>
+ 2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5>
+ 2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5>
+ 2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7>
+ 2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2>
+ 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
+ 3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2>
+ 2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2>
+ 2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2>
+ 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
+ 1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6>
+ 2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3>
+ 3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+ 1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS
+ 3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS
+ 3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4>
+ 3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7>
+ 3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4>
+ 3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7>
+ 2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7>
+ 3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7>
+ 2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7>
+ 2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7>
+ 1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS
+ 2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2>
+ 2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2>
+ 1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS
+ 1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u>
+ 2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3>
+ 3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+ 1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS
+ 2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0>
+ 1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS
+ 1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u>
+ 2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2>
+ 2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5>
+ 2265397305U, // <4,u,0,5>: Cost 3 vrev <u,4,5,0>
+ 2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u>
+ 2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0>
+ 1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS
+ 2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2>
+ 2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1>
+ 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3>
+ 2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3>
+ 2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7>
+ 3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7>
+ 2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3>
+ 1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS
+ 2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u>
+ 2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2>
+ 1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u>
+ 2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u>
+ 2733864859U, // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4>
+ 2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7>
+ 2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3>
+ 1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u>
+ 2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2>
+ 2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2>
+ 2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u>
+ 2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3>
+ 2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6>
+ 2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7>
+ 2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u>
+ 2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2>
+ 1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS
+ 2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2>
+ 2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4>
+ 2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4>
+ 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS
+ 1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS
+ 1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS
+ 2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6>
+ 161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS
+ 1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+ 1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS
+ 2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5>
+ 3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS
+ 1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+ 1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS
+ 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS
+ 1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+ 1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS
+ 2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2>
+ 1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS
+ 1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6>
+ 1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS
+ 1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6>
+ 1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS
+ 27705344U, // <4,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,6,u>: Cost 0 copy RHS
+ 2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS
+ 2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4>
+ 2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7>
+ 2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u>
+ 2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS
+ 2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+ 2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7>
+ 2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+ 2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS
+ 1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS
+ 1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS
+ 1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u>
+ 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS
+ 1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS
+ 1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 27705344U, // <4,u,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,u,u>: Cost 0 copy RHS
+ 2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0>
+ 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
+ 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
+ 3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5>
+ 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
+ 3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0>
+ 3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0>
+ 3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0>
+ 2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2>
+ 2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS
+ 2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1>
+ 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7>
+ 2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS
+ 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
+ 3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7>
+ 3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2>
+ 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
+ 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+ 2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4>
+ 2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5>
+ 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
+ 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
+ 2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4>
+ 2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+ 2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5>
+ 3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5>
+ 3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4>
+ 2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5>
+ 3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5>
+ 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
+ 3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4>
+ 3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0>
+ 3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7>
+ 2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5>
+ 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
+ 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+ 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
+ 3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5>
+ 2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS
+ 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
+ 3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5>
+ 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+ 1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+ 3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1>
+ 2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS
+ 3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS
+ 3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0>
+ 3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5>
+ 2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0>
+ 3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0>
+ 3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS
+ 2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS
+ 4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0>
+ 2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+ 3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7>
+ 3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6>
+ 3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5>
+ 3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7>
+ 3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6>
+ 2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5>
+ 2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS
+ 2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS
+ 2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0>
+ 2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7>
+ 3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2>
+ 2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS
+ 2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0>
+ 2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7>
+ 4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7>
+ 2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS
+ 2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2>
+ 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
+ 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5>
+ 2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6>
+ 2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS
+ 2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u>
+ 2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+ 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
+ 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2>
+ 2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2>
+ 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
+ 2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0>
+ 3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7>
+ 3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0>
+ 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1>
+ 2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1>
+ 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
+ 2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3>
+ 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
+ 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
+ 3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5>
+ 3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5>
+ 2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3>
+ 3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2>
+ 3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3>
+ 2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2>
+ 2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0>
+ 2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5>
+ 2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3>
+ 2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7>
+ 3808199610U, // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0>
+ 2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0>
+ 2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS
+ 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
+ 3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5>
+ 2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5>
+ 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
+ 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
+ 3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7>
+ 2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5>
+ 2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3>
+ 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
+ 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
+ 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
+ 2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5>
+ 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
+ 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
+ 2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6>
+ 3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4>
+ 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
+ 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
+ 2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1>
+ 3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1>
+ 2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7>
+ 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
+ 2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5>
+ 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
+ 2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7>
+ 2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1>
+ 2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS
+ 3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7>
+ 2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3>
+ 3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7>
+ 2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS
+ 2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7>
+ 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
+ 2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1>
+ 2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1>
+ 2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS
+ 2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1>
+ 2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1>
+ 2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS
+ 2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS
+ 2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3>
+ 4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6>
+ 2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7>
+ 2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS
+ 1591662326U, // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1>
+ 1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5>
+ 2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS
+ 2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5>
+ 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
+ 2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7>
+ 2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1>
+ 2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS
+ 3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0>
+ 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2>
+ 3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2>
+ 3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1>
+ 3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1>
+ 3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4>
+ 3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0>
+ 2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2>
+ 3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2>
+ 3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5>
+ 2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5>
+ 3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS
+ 3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0>
+ 3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3>
+ 3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1>
+ 2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5>
+ 3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS
+ 3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3>
+ 2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2>
+ 2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3>
+ 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
+ 3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7>
+ 3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6>
+ 3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5>
+ 2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3>
+ 2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1>
+ 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
+ 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
+ 3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5>
+ 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
+ 2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5>
+ 3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5>
+ 3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7>
+ 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
+ 2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2>
+ 3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3>
+ 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
+ 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
+ 2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6>
+ 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2>
+ 3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4>
+ 2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS
+ 3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3>
+ 3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7>
+ 2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS
+ 2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS
+ 2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5>
+ 3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7>
+ 3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1>
+ 2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS
+ 3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS
+ 3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3>
+ 2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3>
+ 2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7>
+ 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
+ 3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7>
+ 3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7>
+ 3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1>
+ 2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7>
+ 2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS
+ 2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2>
+ 2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7>
+ 2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS
+ 2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS
+ 4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5>
+ 3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6>
+ 3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7>
+ 2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS
+ 2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1>
+ 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u>
+ 2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3>
+ 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
+ 2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5>
+ 3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u>
+ 1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5>
+ 3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0>
+ 2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2>
+ 3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0>
+ 3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2>
+ 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
+ 3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2>
+ 3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0>
+ 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0>
+ 2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2>
+ 3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3>
+ 3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1>
+ 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
+ 2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5>
+ 3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3>
+ 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
+ 3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7>
+ 2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5>
+ 2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3>
+ 3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1>
+ 3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5>
+ 3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2>
+ 2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4>
+ 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
+ 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
+ 3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3>
+ 3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3>
+ 2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4>
+ 3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1>
+ 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
+ 3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2>
+ 2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3>
+ 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
+ 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
+ 3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7>
+ 3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5>
+ 2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5>
+ 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
+ 3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0>
+ 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
+ 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
+ 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
+ 2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6>
+ 3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5>
+ 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4>
+ 2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6>
+ 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
+ 2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5>
+ 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
+ 2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5>
+ 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
+ 2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5>
+ 3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0>
+ 2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5>
+ 2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS
+ 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
+ 3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6>
+ 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
+ 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
+ 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
+ 3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0>
+ 4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6>
+ 2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4>
+ 2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS
+ 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
+ 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
+ 2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2>
+ 2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2>
+ 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
+ 2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3>
+ 2601513466U, // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3>
+ 3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7>
+ 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
+ 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
+ 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
+ 2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2>
+ 2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2>
+ 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
+ 2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6>
+ 2601521658U, // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3>
+ 2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u>
+ 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
+ 3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS
+ 2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS
+ 3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2>
+ 3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5>
+ 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
+ 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
+ 2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0>
+ 3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0>
+ 2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS
+ 2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1>
+ 3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4>
+ 3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4>
+ 3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7>
+ 3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4>
+ 2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0>
+ 3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5>
+ 3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1>
+ 2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1>
+ 3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4>
+ 3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4>
+ 3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4>
+ 2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5>
+ 3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4>
+ 2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3>
+ 3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3>
+ 3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5>
+ 2666752099U, // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5>
+ 3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS
+ 3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4>
+ 2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4>
+ 3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3>
+ 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
+ 3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0>
+ 2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5>
+ 3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7>
+ 2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4>
+ 2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS
+ 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
+ 3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3>
+ 3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4>
+ 2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4>
+ 1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5>
+ 2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4>
+ 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
+ 1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5>
+ 2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS
+ 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
+ 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
+ 3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2>
+ 2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS
+ 2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5>
+ 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+ 3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS
+ 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
+ 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
+ 2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7>
+ 3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2>
+ 2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6>
+ 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
+ 2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5>
+ 3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7>
+ 2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5>
+ 2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS
+ 2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS
+ 2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4>
+ 2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7>
+ 2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7>
+ 2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS
+ 3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5>
+ 94817590U, // <5,4,7,6>: Cost 1 vrev RHS
+ 2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7>
+ 94965064U, // <5,4,7,u>: Cost 1 vrev RHS
+ 2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS
+ 2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u>
+ 2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u>
+ 2667419628U, // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4>
+ 2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS
+ 1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5>
+ 94825783U, // <5,4,u,6>: Cost 1 vrev RHS
+ 2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5>
+ 94973257U, // <5,4,u,u>: Cost 1 vrev RHS
+ 2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0>
+ 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2>
+ 3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2>
+ 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
+ 2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0>
+ 3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7>
+ 4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS
+ 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
+ 2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5>
+ 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
+ 2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3>
+ 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
+ 2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7>
+ 3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7>
+ 2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3>
+ 2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5>
+ 3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS
+ 3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3>
+ 2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2>
+ 2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4>
+ 3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3>
+ 2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3>
+ 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
+ 4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7>
+ 2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4>
+ 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
+ 3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5>
+ 3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3>
+ 2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5>
+ 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
+ 2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5>
+ 3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7>
+ 3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1>
+ 2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2>
+ 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
+ 2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5>
+ 3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3>
+ 3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4>
+ 1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+ 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
+ 2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5>
+ 2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6>
+ 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
+ 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3>
+ 2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2>
+ 2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2>
+ 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS
+ 2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0>
+ 2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7>
+ 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS
+ 2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS
+ 3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6>
+ 2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3>
+ 3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6>
+ 2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5>
+ 2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5>
+ 2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6>
+ 2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1>
+ 2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1>
+ 2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS
+ 3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7>
+ 2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5>
+ 2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7>
+ 2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS
+ 3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5>
+ 4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6>
+ 2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS
+ 2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS
+ 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5>
+ 2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u>
+ 1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS
+ 2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7>
+ 2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS
+ 229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS
+ 2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0>
+ 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
+ 2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2>
+ 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
+ 2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5>
+ 3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6>
+ 3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7>
+ 4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS
+ 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
+ 2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2>
+ 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
+ 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
+ 2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3>
+ 2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6>
+ 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
+ 3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7>
+ 2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+ 2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6>
+ 3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2>
+ 3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3>
+ 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
+ 2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1>
+ 2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6>
+ 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
+ 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
+ 2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3>
+ 2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6>
+ 2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2>
+ 3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3>
+ 3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6>
+ 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
+ 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
+ 2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6>
+ 3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7>
+ 4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS
+ 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
+ 2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS
+ 3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5>
+ 2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5>
+ 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
+ 2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6>
+ 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
+ 2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6>
+ 2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5>
+ 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
+ 2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS
+ 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
+ 3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6>
+ 3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4>
+ 2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6>
+ 2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5>
+ 2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1>
+ 2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS
+ 2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS
+ 2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS
+ 3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4>
+ 2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3>
+ 2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6>
+ 2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS
+ 2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6>
+ 2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6>
+ 2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+ 2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS
+ 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS
+ 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+ 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS
+ 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
+ 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
+ 1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7>
+ 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS
+ 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS
+ 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
+ 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS
+ 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
+ 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
+ 1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2>
+ 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS
+ 2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0>
+ 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2>
+ 2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0>
+ 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
+ 2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7>
+ 3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7>
+ 2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0>
+ 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2>
+ 2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1>
+ 2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0>
+ 1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7>
+ 2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS
+ 2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7>
+ 2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7>
+ 3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7>
+ 1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7>
+ 2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7>
+ 3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3>
+ 2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2>
+ 2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1>
+ 2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7>
+ 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
+ 2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7>
+ 3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7>
+ 2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1>
+ 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
+ 2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5>
+ 3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1>
+ 2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3>
+ 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
+ 2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0>
+ 3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7>
+ 2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7>
+ 2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2>
+ 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
+ 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
+ 3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0>
+ 2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4>
+ 2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS
+ 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+ 2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7>
+ 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS
+ 2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3>
+ 2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3>
+ 2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7>
+ 2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS
+ 2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5>
+ 2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7>
+ 1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS
+ 1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS
+ 2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0>
+ 3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5>
+ 2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2>
+ 2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6>
+ 2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4>
+ 2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u>
+ 2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+ 2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7>
+ 2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u>
+ 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
+ 2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1>
+ 2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2>
+ 2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3>
+ 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
+ 1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7>
+ 2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3>
+ 2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7>
+ 1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS
+ 1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS
+ 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0>
+ 1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS
+ 1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS
+ 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7>
+ 1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS
+ 1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0>
+ 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
+ 2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2>
+ 2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2>
+ 2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1>
+ 2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1>
+ 2266134675U, // <5,u,0,6>: Cost 3 vrev <u,5,6,0>
+ 2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0>
+ 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
+ 2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2>
+ 2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1>
+ 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u>
+ 2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u>
+ 2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0>
+ 2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u>
+ 2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+ 1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0>
+ 2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+ 2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2>
+ 2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3>
+ 2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u>
+ 2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u>
+ 2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7>
+ 2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3>
+ 2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1>
+ 2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1>
+ 2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u>
+ 2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u>
+ 2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3>
+ 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
+ 2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0>
+ 2734610422U, // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5>
+ 2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u>
+ 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
+ 1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u>
+ 1661163546U, // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5>
+ 2734463012U, // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6>
+ 2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5>
+ 1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+ 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
+ 2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+ 2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u>
+ 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
+ 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5>
+ 2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3>
+ 2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7>
+ 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS
+ 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+ 1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS
+ 229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS
+ 2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS
+ 2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+ 2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6>
+ 2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7>
+ 2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS
+ 2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS
+ 2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+ 2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+ 2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7>
+ 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS
+ 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
+ 1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS
+ 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS
+ 1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 118708378U, // <5,u,7,6>: Cost 1 vrev RHS
+ 2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS
+ 430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS
+ 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS
+ 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
+ 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS
+ 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS
+ 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS
+ 118716571U, // <5,u,u,6>: Cost 1 vrev RHS
+ 1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS
+ 430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS
+ 2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0>
+ 2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1>
+ 2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2>
+ 3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5>
+ 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
+ 3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0>
+ 3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6>
+ 3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7>
+ 2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2>
+ 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
+ 3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0>
+ 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6>
+ 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
+ 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
+ 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
+ 2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1>
+ 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2>
+ 2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6>
+ 3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6>
+ 2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5>
+ 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
+ 3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7>
+ 2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6>
+ 3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2>
+ 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
+ 3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2>
+ 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
+ 2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5>
+ 3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3>
+ 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
+ 3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6>
+ 3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6>
+ 3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7>
+ 2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5>
+ 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
+ 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
+ 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
+ 3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6>
+ 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
+ 2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS
+ 2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0>
+ 2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0>
+ 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
+ 3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS
+ 2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6>
+ 3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6>
+ 3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0>
+ 2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6>
+ 3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6>
+ 2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0>
+ 2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0>
+ 2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS
+ 3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS
+ 3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5>
+ 2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0>
+ 3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7>
+ 2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0>
+ 2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1>
+ 2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS
+ 2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS
+ 2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0>
+ 2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2>
+ 2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7>
+ 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
+ 4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5>
+ 3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0>
+ 4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7>
+ 2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS
+ 2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2>
+ 2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1>
+ 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5>
+ 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
+ 2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS
+ 2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u>
+ 2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
+ 2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS
+ 3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6>
+ 2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2>
+ 2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS
+ 3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2>
+ 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
+ 2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0>
+ 2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2>
+ 3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1>
+ 2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1>
+ 3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6>
+ 2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3>
+ 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
+ 3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5>
+ 3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6>
+ 3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1>
+ 2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3>
+ 2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS
+ 3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3>
+ 3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2>
+ 2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0>
+ 2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS
+ 3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3>
+ 2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3>
+ 3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0>
+ 2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0>
+ 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
+ 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
+ 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
+ 3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1>
+ 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
+ 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
+ 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
+ 3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2>
+ 2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3>
+ 2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1>
+ 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
+ 3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4>
+ 2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6>
+ 3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS
+ 2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6>
+ 3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0>
+ 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
+ 2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6>
+ 2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1>
+ 3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7>
+ 3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6>
+ 2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7>
+ 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
+ 3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6>
+ 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
+ 3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS
+ 2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7>
+ 2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS
+ 3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7>
+ 3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6>
+ 3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS
+ 2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS
+ 3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7>
+ 2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6>
+ 3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1>
+ 3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS
+ 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
+ 2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7>
+ 2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2>
+ 3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS
+ 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
+ 2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5>
+ 3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0>
+ 4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7>
+ 2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS
+ 2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS
+ 2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3>
+ 2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6>
+ 2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0>
+ 2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6>
+ 2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7>
+ 2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u>
+ 2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u>
+ 2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0>
+ 2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0>
+ 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2>
+ 3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0>
+ 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
+ 3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3>
+ 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
+ 3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0>
+ 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1>
+ 2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1>
+ 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
+ 2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS
+ 3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6>
+ 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
+ 2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3>
+ 3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1>
+ 2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1>
+ 3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1>
+ 3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3>
+ 2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2>
+ 2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3>
+ 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
+ 3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7>
+ 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
+ 3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7>
+ 2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3>
+ 2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1>
+ 3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0>
+ 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
+ 2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4>
+ 2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5>
+ 2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6>
+ 2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6>
+ 2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4>
+ 2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1>
+ 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+ 3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u>
+ 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
+ 2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6>
+ 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
+ 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
+ 2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0>
+ 3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2>
+ 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
+ 3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3>
+ 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
+ 3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7>
+ 2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6>
+ 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5>
+ 2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5>
+ 2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0>
+ 2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+ 2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6>
+ 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
+ 2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3>
+ 2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6>
+ 2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7>
+ 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
+ 2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7>
+ 2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6>
+ 2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1>
+ 2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7>
+ 2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS
+ 2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2>
+ 2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7>
+ 1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS
+ 2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS
+ 3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7>
+ 2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+ 2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7>
+ 1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS
+ 1591744256U, // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2>
+ 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6>
+ 1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS
+ 2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5>
+ 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
+ 2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0>
+ 2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+ 1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS
+ 3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0>
+ 2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2>
+ 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
+ 3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2>
+ 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
+ 2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2>
+ 3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0>
+ 2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0>
+ 2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2>
+ 3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3>
+ 3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1>
+ 3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3>
+ 3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1>
+ 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+ 3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3>
+ 3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0>
+ 3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3>
+ 2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+ 3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4>
+ 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+ 3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2>
+ 3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0>
+ 3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6>
+ 3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7>
+ 2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7>
+ 3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6>
+ 2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+ 3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1>
+ 3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3>
+ 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
+ 2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3>
+ 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
+ 3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5>
+ 3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7>
+ 3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7>
+ 2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5>
+ 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
+ 2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3>
+ 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
+ 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
+ 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
+ 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
+ 2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6>
+ 2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4>
+ 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
+ 3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS
+ 3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7>
+ 3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5>
+ 3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5>
+ 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
+ 3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7>
+ 3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6>
+ 3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0>
+ 2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6>
+ 2602164326U, // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS
+ 2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3>
+ 2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6>
+ 3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1>
+ 2602167524U, // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6>
+ 3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7>
+ 2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6>
+ 3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7>
+ 2602170158U, // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS
+ 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
+ 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
+ 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
+ 2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2>
+ 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
+ 2602176208U, // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3>
+ 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
+ 2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7>
+ 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
+ 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
+ 2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2>
+ 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
+ 2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2>
+ 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
+ 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
+ 2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0>
+ 2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7>
+ 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
+ 3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0>
+ 2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6>
+ 3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1>
+ 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
+ 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
+ 2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2>
+ 3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0>
+ 2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1>
+ 3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1>
+ 3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0>
+ 2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS
+ 3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS
+ 3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0>
+ 2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3>
+ 3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1>
+ 2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1>
+ 2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4>
+ 3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3>
+ 3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2>
+ 3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1>
+ 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
+ 2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS
+ 2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0>
+ 3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0>
+ 2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4>
+ 3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2>
+ 3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3>
+ 3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6>
+ 3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3>
+ 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
+ 3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6>
+ 2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5>
+ 3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7>
+ 2668817222U, // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6>
+ 2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS
+ 3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4>
+ 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
+ 3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4>
+ 2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4>
+ 2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS
+ 1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+ 2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4>
+ 1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+ 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
+ 3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3>
+ 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
+ 2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6>
+ 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
+ 2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5>
+ 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+ 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
+ 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
+ 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
+ 1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS
+ 2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3>
+ 2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3>
+ 2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2>
+ 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
+ 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
+ 3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5>
+ 2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7>
+ 2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS
+ 2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5>
+ 2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6>
+ 4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7>
+ 2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS
+ 1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u>
+ 2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6>
+ 1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6>
+ 2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS
+ 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+ 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0>
+ 2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS
+ 3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6>
+ 3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2>
+ 3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1>
+ 4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5>
+ 3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7>
+ 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0>
+ 1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0>
+ 2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1>
+ 3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5>
+ 3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0>
+ 3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS
+ 3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6>
+ 3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7>
+ 3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4>
+ 2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3>
+ 2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3>
+ 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
+ 3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5>
+ 3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2>
+ 3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5>
+ 2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS
+ 3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6>
+ 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
+ 3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS
+ 2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5>
+ 3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2>
+ 3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5>
+ 3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3>
+ 3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3>
+ 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
+ 3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6>
+ 3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0>
+ 2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3>
+ 2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3>
+ 2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS
+ 3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5>
+ 3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5>
+ 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
+ 2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS
+ 2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS
+ 3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0>
+ 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
+ 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
+ 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
+ 3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2>
+ 3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2>
+ 3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2>
+ 2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS
+ 2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5>
+ 2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6>
+ 2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7>
+ 2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7>
+ 2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1>
+ 3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4>
+ 3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4>
+ 2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4>
+ 2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5>
+ 3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5>
+ 2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6>
+ 2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0>
+ 2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1>
+ 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
+ 2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7>
+ 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
+ 3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2>
+ 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
+ 2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5>
+ 2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6>
+ 3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS
+ 2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS
+ 2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS
+ 2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u>
+ 2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u>
+ 2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7>
+ 2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS
+ 2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS
+ 2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6>
+ 1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u>
+ 1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u>
+ 2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS
+ 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4>
+ 3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1>
+ 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
+ 3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3>
+ 2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0>
+ 4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS
+ 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
+ 2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1>
+ 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
+ 2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3>
+ 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
+ 2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3>
+ 3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3>
+ 2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6>
+ 3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3>
+ 2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6>
+ 2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1>
+ 3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3>
+ 3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7>
+ 2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7>
+ 2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3>
+ 2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3>
+ 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
+ 3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4>
+ 3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6>
+ 2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3>
+ 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
+ 3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4>
+ 2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6>
+ 4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS
+ 2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5>
+ 2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS
+ 3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2>
+ 3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6>
+ 3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6>
+ 1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+ 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
+ 2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0>
+ 2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6>
+ 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
+ 3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS
+ 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
+ 3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5>
+ 3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0>
+ 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
+ 2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5>
+ 2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0>
+ 2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS
+ 2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS
+ 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
+ 2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2>
+ 2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3>
+ 2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2>
+ 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
+ 2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3>
+ 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS
+ 2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7>
+ 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS
+ 2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS
+ 3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7>
+ 2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7>
+ 2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6>
+ 2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS
+ 2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4>
+ 2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6>
+ 1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS
+ 1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS
+ 1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS
+ 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u>
+ 2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS
+ 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
+ 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS
+ 1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS
+ 296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS
+ 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS
+ 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0>
+ 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
+ 2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+ 2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2>
+ 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS
+ 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+ 1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+ 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
+ 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+ 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+ 2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2>
+ 1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3>
+ 2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2>
+ 2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+ 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6>
+ 2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7>
+ 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+ 2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1>
+ 1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+ 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+ 2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1>
+ 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7>
+ 2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7>
+ 2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+ 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+ 2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+ 2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5>
+ 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS
+ 1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7>
+ 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS
+ 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+ 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+ 2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+ 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+ 1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+ 2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2>
+ 2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3>
+ 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+ 2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6>
+ 2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7>
+ 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+ 1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+ 1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+ 2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7>
+ 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
+ 2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7>
+ 1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+ 2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7>
+ 1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7>
+ 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
+ 1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2>
+ 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+ 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS
+ 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+ 1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+ 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS
+ 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+ 1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1>
+ 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS
+ 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS
+ 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2>
+ 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
+ 2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+ 1193130221U, // <6,u,0,7>: Cost 2 vrev <u,6,7,0>
+ 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS
+ 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+ 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
+ 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+ 2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+ 2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3>
+ 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
+ 2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+ 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS
+ 2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u>
+ 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+ 2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3>
+ 1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+ 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+ 2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3>
+ 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7>
+ 2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5>
+ 2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+ 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+ 1661245476U, // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6>
+ 2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6>
+ 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS
+ 1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 1661614161U, // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6>
+ 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS
+ 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
+ 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
+ 2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7>
+ 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
+ 2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+ 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7>
+ 1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS
+ 2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5>
+ 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS
+ 1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS
+ 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
+ 2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7>
+ 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
+ 1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS
+ 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
+ 2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5>
+ 1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7>
+ 1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS
+ 1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS
+ 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
+ 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS
+ 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
+ 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS
+ 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS
+ 1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS
+ 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS
+ 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+ 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
+ 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
+ 3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0>
+ 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
+ 2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6>
+ 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
+ 3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7>
+ 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
+ 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
+ 2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5>
+ 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS
+ 2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7>
+ 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
+ 2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7>
+ 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
+ 2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0>
+ 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS
+ 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+ 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
+ 2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0>
+ 2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1>
+ 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+ 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+ 2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7>
+ 2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7>
+ 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
+ 2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2>
+ 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
+ 2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0>
+ 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
+ 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
+ 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
+ 2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0>
+ 3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7>
+ 2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0>
+ 2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4>
+ 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
+ 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
+ 3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4>
+ 2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6>
+ 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
+ 2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6>
+ 3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5>
+ 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
+ 2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS
+ 2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3>
+ 2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+ 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
+ 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
+ 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
+ 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
+ 2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7>
+ 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
+ 2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7>
+ 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+ 2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7>
+ 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
+ 3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7>
+ 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
+ 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
+ 2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1>
+ 2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7>
+ 2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2>
+ 2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS
+ 3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS
+ 3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7>
+ 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
+ 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
+ 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
+ 2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7>
+ 2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2>
+ 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
+ 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
+ 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS
+ 2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u>
+ 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
+ 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
+ 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
+ 2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1>
+ 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS
+ 2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS
+ 2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS
+ 2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS
+ 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
+ 2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS
+ 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+ 2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0>
+ 3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1>
+ 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
+ 2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1>
+ 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+ 3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6>
+ 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
+ 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
+ 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
+ 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
+ 3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5>
+ 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
+ 2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1>
+ 2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3>
+ 3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2>
+ 1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0>
+ 2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5>
+ 2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3>
+ 2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2>
+ 2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0>
+ 1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0>
+ 2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0>
+ 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+ 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
+ 2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7>
+ 2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5>
+ 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
+ 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
+ 2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3>
+ 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
+ 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
+ 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
+ 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
+ 2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5>
+ 3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5>
+ 2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS
+ 2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1>
+ 3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0>
+ 2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS
+ 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
+ 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
+ 3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6>
+ 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
+ 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
+ 3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7>
+ 2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1>
+ 2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1>
+ 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
+ 3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7>
+ 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
+ 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+ 2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7>
+ 3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7>
+ 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
+ 3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7>
+ 2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1>
+ 2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7>
+ 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
+ 2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1>
+ 3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3>
+ 3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS
+ 2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS
+ 3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7>
+ 3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0>
+ 2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7>
+ 3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS
+ 2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS
+ 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
+ 2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0>
+ 1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7>
+ 2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS
+ 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
+ 2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7>
+ 2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS
+ 1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7>
+ 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
+ 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
+ 2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0>
+ 2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0>
+ 2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6>
+ 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
+ 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
+ 3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2>
+ 2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7>
+ 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
+ 3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0>
+ 3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0>
+ 2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1>
+ 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
+ 3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0>
+ 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
+ 2602718850U, // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+ 2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1>
+ 2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1>
+ 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
+ 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
+ 2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5>
+ 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
+ 2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6>
+ 3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5>
+ 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
+ 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
+ 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
+ 2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6>
+ 2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7>
+ 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
+ 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
+ 2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6>
+ 2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0>
+ 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
+ 2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6>
+ 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
+ 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
+ 2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5>
+ 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
+ 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
+ 2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0>
+ 3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0>
+ 2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7>
+ 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
+ 3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3>
+ 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
+ 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
+ 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
+ 3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7>
+ 2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7>
+ 3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0>
+ 2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7>
+ 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
+ 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
+ 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
+ 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
+ 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
+ 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
+ 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
+ 3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7>
+ 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
+ 2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1>
+ 3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0>
+ 3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5>
+ 2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS
+ 2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS
+ 3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7>
+ 4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6>
+ 2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7>
+ 2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS
+ 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
+ 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
+ 1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
+ 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
+ 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
+ 2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7>
+ 2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0>
+ 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
+ 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
+ 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
+ 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
+ 2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2>
+ 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
+ 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
+ 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
+ 3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0>
+ 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
+ 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1>
+ 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
+ 2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3>
+ 2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5>
+ 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
+ 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
+ 3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1>
+ 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
+ 2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5>
+ 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
+ 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
+ 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
+ 2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0>
+ 2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5>
+ 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
+ 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
+ 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
+ 2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0>
+ 2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1>
+ 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
+ 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
+ 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
+ 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
+ 2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3>
+ 2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7>
+ 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
+ 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
+ 2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4>
+ 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
+ 2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5>
+ 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
+ 2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6>
+ 3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4>
+ 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
+ 2602819686U, // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS
+ 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
+ 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
+ 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
+ 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5>
+ 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
+ 2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0>
+ 2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0>
+ 1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3>
+ 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
+ 2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3>
+ 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
+ 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
+ 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
+ 3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7>
+ 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
+ 2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7>
+ 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
+ 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
+ 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
+ 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
+ 2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7>
+ 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
+ 2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7>
+ 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
+ 2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7>
+ 2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1>
+ 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u>
+ 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
+ 1593153452U, // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
+ 1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u>
+ 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
+ 2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3>
+ 2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0>
+ 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
+ 2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0>
+ 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
+ 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
+ 3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1>
+ 2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5>
+ 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+ 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+ 3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+ 1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1>
+ 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
+ 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
+ 2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3>
+ 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
+ 2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3>
+ 2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0>
+ 2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3>
+ 3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3>
+ 2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0>
+ 3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1>
+ 3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3>
+ 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
+ 2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1>
+ 3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7>
+ 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+ 2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0>
+ 3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5>
+ 2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0>
+ 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
+ 3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5>
+ 3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1>
+ 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
+ 2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6>
+ 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
+ 2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5>
+ 3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7>
+ 2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4>
+ 2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2>
+ 3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1>
+ 3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4>
+ 3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3>
+ 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+ 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
+ 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+ 3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7>
+ 1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5>
+ 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
+ 2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7>
+ 2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3>
+ 2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5>
+ 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
+ 2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7>
+ 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS
+ 2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7>
+ 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS
+ 2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1>
+ 3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5>
+ 2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3>
+ 2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4>
+ 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+ 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
+ 2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7>
+ 2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+ 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
+ 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
+ 3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2>
+ 3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5>
+ 3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6>
+ 2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1>
+ 2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+ 2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1>
+ 2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7>
+ 2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7>
+ 1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS
+ 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
+ 2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3>
+ 2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u>
+ 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
+ 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
+ 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS
+ 2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5>
+ 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS
+ 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
+ 2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS
+ 3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0>
+ 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
+ 2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1>
+ 2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1>
+ 2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2>
+ 2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
+ 3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1>
+ 3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0>
+ 2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7>
+ 2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
+ 2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3>
+ 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
+ 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+ 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+ 3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1>
+ 3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0>
+ 3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7>
+ 2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4>
+ 3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7>
+ 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+ 3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3>
+ 2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3>
+ 2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4>
+ 3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS
+ 2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5>
+ 3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5>
+ 3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3>
+ 3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS
+ 2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5>
+ 3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5>
+ 2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0>
+ 2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0>
+ 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
+ 3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7>
+ 3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5>
+ 3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4>
+ 2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS
+ 2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS
+ 2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5>
+ 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+ 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
+ 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
+ 2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3>
+ 3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3>
+ 3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3>
+ 2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4>
+ 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
+ 2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+ 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+ 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
+ 2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1>
+ 2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7>
+ 3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2>
+ 2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4>
+ 2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5>
+ 2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+ 2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+ 1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0>
+ 1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0>
+ 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
+ 2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3>
+ 2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2>
+ 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
+ 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
+ 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+ 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+ 2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1>
+ 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
+ 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
+ 2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0>
+ 2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u>
+ 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
+ 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
+ 2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7>
+ 1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0>
+ 1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3>
+ 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
+ 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
+ 2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4>
+ 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0>
+ 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
+ 2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0>
+ 2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0>
+ 2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2>
+ 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
+ 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1>
+ 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
+ 2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0>
+ 2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3>
+ 2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS
+ 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
+ 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+ 2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3>
+ 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1>
+ 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
+ 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
+ 2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2>
+ 2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1>
+ 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
+ 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
+ 2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7>
+ 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+ 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+ 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
+ 2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3>
+ 2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3>
+ 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
+ 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
+ 2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7>
+ 3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6>
+ 2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0>
+ 2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0>
+ 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
+ 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
+ 2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5>
+ 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4>
+ 2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6>
+ 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
+ 2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0>
+ 2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5>
+ 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
+ 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
+ 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
+ 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
+ 3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4>
+ 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
+ 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
+ 2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0>
+ 2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7>
+ 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5>
+ 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
+ 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
+ 2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3>
+ 3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4>
+ 2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4>
+ 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
+ 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
+ 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+ 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
+ 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
+ 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
+ 2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2>
+ 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
+ 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
+ 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
+ 2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6>
+ 2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7>
+ 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
+ 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
+ 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
+ 2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2>
+ 2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0>
+ 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
+ 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
+ 1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6>
+ 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
+ 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
+ 2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0>
+ 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
+ 2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0>
+ 3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0>
+ 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
+ 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
+ 2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0>
+ 2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2>
+ 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
+ 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
+ 2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1>
+ 2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3>
+ 2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5>
+ 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
+ 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
+ 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
+ 2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1>
+ 2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3>
+ 3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5>
+ 3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0>
+ 2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2>
+ 2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0>
+ 3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5>
+ 3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3>
+ 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
+ 2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3>
+ 2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3>
+ 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
+ 3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3>
+ 3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6>
+ 2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7>
+ 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
+ 2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3>
+ 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
+ 2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7>
+ 2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7>
+ 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
+ 3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7>
+ 3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3>
+ 3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4>
+ 2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4>
+ 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
+ 2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4>
+ 2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6>
+ 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
+ 2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS
+ 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
+ 3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3>
+ 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
+ 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
+ 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
+ 2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7>
+ 2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5>
+ 1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7>
+ 2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS
+ 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
+ 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
+ 2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7>
+ 2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS
+ 2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7>
+ 1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7>
+ 2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0>
+ 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
+ 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
+ 2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2>
+ 2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2>
+ 2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2>
+ 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
+ 2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7>
+ 2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7>
+ 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS
+ 1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS
+ 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
+ 2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3>
+ 2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0>
+ 1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS
+ 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
+ 1595840756U, // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7>
+ 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS
+ 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+ 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
+ 1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2>
+ 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
+ 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
+ 1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1>
+ 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+ 2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS
+ 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
+ 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
+ 1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+ 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS
+ 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
+ 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
+ 2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3>
+ 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
+ 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+ 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS
+ 1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+ 2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0>
+ 1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0>
+ 1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+ 2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4>
+ 2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3>
+ 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+ 1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0>
+ 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
+ 1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+ 2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6>
+ 1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
+ 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
+ 2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7>
+ 2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0>
+ 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
+ 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
+ 1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5>
+ 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
+ 2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5>
+ 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+ 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
+ 1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6>
+ 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+ 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
+ 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
+ 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
+ 2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3>
+ 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
+ 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
+ 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
+ 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS
+ 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
+ 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS
+ 2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1>
+ 2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7>
+ 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
+ 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
+ 1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+ 2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7>
+ 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
+ 1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0>
+ 1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7>
+ 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
+ 2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3>
+ 2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6>
+ 2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7>
+ 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
+ 1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7>
+ 2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2>
+ 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS
+ 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
+ 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
+ 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS
+ 1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0>
+ 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
+ 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
+ 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS
+ 363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS
+ 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS
+ 135053414U, // <u,0,0,0>: Cost 1 vdup0 LHS
+ 1611489290U, // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+ 1611489300U, // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+ 2568054923U, // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+ 1481706806U, // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
+ 2555449040U, // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3>
+ 2591282078U, // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0>
+ 2591945711U, // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <u,0,0,u>: Cost 1 vdup0 LHS
+ 1493655654U, // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
+ 1860550758U, // <u,0,1,1>: Cost 2 vzipl LHS, LHS
+ 537747563U, // <u,0,1,2>: Cost 1 vext3 LHS, LHS
+ 2625135576U, // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3>
+ 1493658934U, // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
+ 2625135760U, // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7>
+ 1517548447U, // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
+ 2591290362U, // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2>
+ 537747612U, // <u,0,1,u>: Cost 1 vext3 LHS, LHS
+ 1611489444U, // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2685231276U, // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+ 1994768486U, // <u,0,2,2>: Cost 2 vtrnl LHS, LHS
+ 2685231294U, // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+ 1611489484U, // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2712068310U, // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+ 2625136570U, // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7>
+ 2591962097U, // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+ 1611489516U, // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+ 2954067968U, // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+ 2685231356U, // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+ 72589981U, // <u,0,3,2>: Cost 1 vrev LHS
+ 2625137052U, // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3>
+ 2625137154U, // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6>
+ 2639071848U, // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0>
+ 2639735481U, // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0>
+ 2597279354U, // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3>
+ 73032403U, // <u,0,3,u>: Cost 1 vrev LHS
+ 2687074636U, // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u>
+ 1611489618U, // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+ 1611489628U, // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+ 3629222038U, // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2>
+ 2555481398U, // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS
+ 1551396150U, // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
+ 2651680116U, // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6>
+ 2646150600U, // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+ 1611932050U, // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+ 2561458278U, // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS
+ 1863532646U, // <u,0,5,1>: Cost 2 vzipl RHS, LHS
+ 2712068526U, // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+ 2649689976U, // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0>
+ 2220237489U, // <u,0,5,4>: Cost 3 vrev <0,u,4,5>
+ 2651680772U, // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5>
+ 1577939051U, // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
+ 2830077238U, // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+ 1579266317U, // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0>
+ 2555494502U, // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS
+ 2712068598U, // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+ 1997750374U, // <u,0,6,2>: Cost 2 vtrnl RHS, LHS
+ 2655662673U, // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0>
+ 2555497782U, // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS
+ 2651681459U, // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u>
+ 2651681592U, // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6>
+ 2651681614U, // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1>
+ 1997750428U, // <u,0,6,u>: Cost 2 vtrnl RHS, LHS
+ 2567446630U, // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS
+ 2567447446U, // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0>
+ 2567448641U, // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7>
+ 2573421338U, // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7>
+ 2567449910U, // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS
+ 2651682242U, // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u>
+ 2591339429U, // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7>
+ 2651682412U, // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7>
+ 2567452462U, // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS
+ 135053414U, // <u,0,u,0>: Cost 1 vdup0 LHS
+ 1611489938U, // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+ 537748125U, // <u,0,u,2>: Cost 1 vext3 LHS, LHS
+ 2685674148U, // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+ 1611932338U, // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+ 1551399066U, // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
+ 1517605798U, // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
+ 2830077481U, // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+ 537748179U, // <u,0,u,u>: Cost 1 vext3 LHS, LHS
+ 1544101961U, // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1>
+ 1558036582U, // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS
+ 2619171051U, // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1>
+ 1611490038U, // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+ 2555522358U, // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS
+ 2712068871U, // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+ 2591355815U, // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0>
+ 2597328512U, // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0>
+ 1611490083U, // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+ 1481785446U, // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS
+ 202162278U, // <u,1,1,1>: Cost 1 vdup1 LHS
+ 2555528808U, // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2>
+ 1611490120U, // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+ 1481788726U, // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
+ 2689876828U, // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+ 2591364008U, // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1>
+ 2592691274U, // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+ 202162278U, // <u,1,1,u>: Cost 1 vdup1 LHS
+ 1499709542U, // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS
+ 2689876871U, // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+ 2631116445U, // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1>
+ 835584U, // <u,1,2,3>: Cost 0 copy LHS
+ 1499712822U, // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS
+ 2689876907U, // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+ 2631780282U, // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7>
+ 1523603074U, // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2>
+ 835584U, // <u,1,2,u>: Cost 0 copy LHS
+ 1487773798U, // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
+ 1611490264U, // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+ 2685232094U, // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+ 2018746470U, // <u,1,3,3>: Cost 2 vtrnr LHS, LHS
+ 1487777078U, // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
+ 1611490304U, // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+ 2685674505U, // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+ 2640407307U, // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1>
+ 1611490327U, // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+ 1567992749U, // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
+ 2693121070U, // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u>
+ 2693194807U, // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u>
+ 1152386432U, // <u,1,4,3>: Cost 2 vrev <1,u,3,4>
+ 2555555126U, // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS
+ 1558039862U, // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS
+ 2645716371U, // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1>
+ 2597361284U, // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4>
+ 1152755117U, // <u,1,4,u>: Cost 2 vrev <1,u,u,4>
+ 1481818214U, // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS
+ 2555560694U, // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2>
+ 2555561576U, // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2>
+ 1611490448U, // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+ 1481821494U, // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
+ 2651025435U, // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1>
+ 2651689068U, // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1>
+ 2823966006U, // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS
+ 1611932861U, // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+ 2555568230U, // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS
+ 2689877199U, // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+ 2712069336U, // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+ 2685232353U, // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+ 2555571510U, // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS
+ 2689877235U, // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+ 2657661765U, // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1>
+ 1584583574U, // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1>
+ 1585247207U, // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1>
+ 2561548390U, // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS
+ 2561549681U, // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7>
+ 2573493926U, // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1>
+ 2042962022U, // <u,1,7,3>: Cost 2 vtrnr RHS, LHS
+ 2561551670U, // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS
+ 2226300309U, // <u,1,7,5>: Cost 3 vrev <1,u,5,7>
+ 2658325990U, // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u>
+ 2658326124U, // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7>
+ 2042962027U, // <u,1,7,u>: Cost 2 vtrnr RHS, LHS
+ 1481842790U, // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS
+ 202162278U, // <u,1,u,1>: Cost 1 vdup1 LHS
+ 2685674867U, // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+ 835584U, // <u,1,u,3>: Cost 0 copy LHS
+ 1481846070U, // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS
+ 1611933077U, // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+ 2685674910U, // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+ 1523652232U, // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u>
+ 835584U, // <u,1,u,u>: Cost 0 copy LHS
+ 1544110154U, // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2>
+ 1545437286U, // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
+ 1545437420U, // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2>
+ 2685232589U, // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+ 2619179346U, // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5>
+ 2712069606U, // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7>
+ 2689877484U, // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+ 2659656273U, // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u>
+ 1545437853U, // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS
+ 1550082851U, // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
+ 2619179828U, // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1>
+ 2619179926U, // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0>
+ 2685232671U, // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+ 2555604278U, // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS
+ 2619180176U, // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7>
+ 2689877564U, // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+ 2602718850U, // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+ 1158703235U, // <u,2,1,u>: Cost 2 vrev <2,u,u,1>
+ 1481867366U, // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS
+ 2555609846U, // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2>
+ 269271142U, // <u,2,2,2>: Cost 1 vdup2 LHS
+ 1611490930U, // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+ 1481870646U, // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
+ 2689877640U, // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+ 2619180986U, // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7>
+ 2593436837U, // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+ 269271142U, // <u,2,2,u>: Cost 1 vdup2 LHS
+ 408134301U, // <u,2,3,0>: Cost 1 vext1 LHS, LHS
+ 1481876214U, // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1481877096U, // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1880326246U, // <u,2,3,3>: Cost 2 vzipr LHS, LHS
+ 408137014U, // <u,2,3,4>: Cost 1 vext1 LHS, RHS
+ 1529654992U, // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+ 1529655802U, // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1529656314U, // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 408139566U, // <u,2,3,u>: Cost 1 vext1 LHS, LHS
+ 1567853468U, // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+ 2561598362U, // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4>
+ 2555627214U, // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5>
+ 2685232918U, // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+ 2555628854U, // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS
+ 1545440566U, // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
+ 1571982740U, // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2>
+ 2592125957U, // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+ 1545440809U, // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS
+ 2555633766U, // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS
+ 2561606550U, // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0>
+ 2689877856U, // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+ 2685233000U, // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+ 1158441059U, // <u,2,5,4>: Cost 2 vrev <2,u,4,5>
+ 2645725188U, // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5>
+ 2689877892U, // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+ 2823900470U, // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS
+ 1158736007U, // <u,2,5,u>: Cost 2 vrev <2,u,u,5>
+ 1481900134U, // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS
+ 2555642614U, // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2>
+ 2555643496U, // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2>
+ 1611491258U, // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+ 1481903414U, // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
+ 2689877964U, // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+ 2689877973U, // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+ 2645726030U, // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1>
+ 1611933671U, // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+ 1585919033U, // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2>
+ 2573566710U, // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2>
+ 2567596115U, // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7>
+ 1906901094U, // <u,2,7,3>: Cost 2 vzipr RHS, LHS
+ 2555653430U, // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS
+ 2800080230U, // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6>
+ 2980643164U, // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+ 2645726828U, // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7>
+ 1906901099U, // <u,2,7,u>: Cost 2 vzipr RHS, LHS
+ 408175266U, // <u,2,u,0>: Cost 1 vext1 LHS, LHS
+ 1545443118U, // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
+ 269271142U, // <u,2,u,2>: Cost 1 vdup2 LHS
+ 1611491416U, // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+ 408177974U, // <u,2,u,4>: Cost 1 vext1 LHS, RHS
+ 1545443482U, // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
+ 1726339226U, // <u,2,u,6>: Cost 2 vuzpl LHS, RHS
+ 1529697274U, // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 408180526U, // <u,2,u,u>: Cost 1 vext1 LHS, LHS
+ 1544781824U, // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 471040156U, // <u,3,0,1>: Cost 1 vext2 LHS, LHS
+ 1544781988U, // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 2618523900U, // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0>
+ 1544782162U, // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2238188352U, // <u,3,0,5>: Cost 3 vrev <3,u,5,0>
+ 2623169023U, // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+ 2238335826U, // <u,3,0,7>: Cost 3 vrev <3,u,7,0>
+ 471040669U, // <u,3,0,u>: Cost 1 vext2 LHS, LHS
+ 1544782582U, // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544782644U, // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544782742U, // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1544782808U, // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2618524733U, // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+ 1544782992U, // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2618524897U, // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+ 2703517987U, // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u>
+ 1544783213U, // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+ 1529716838U, // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS
+ 1164167966U, // <u,3,2,1>: Cost 2 vrev <3,u,1,2>
+ 1544783464U, // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+ 1544783526U, // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 1529720118U, // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS
+ 2618525544U, // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544783802U, // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 2704181620U, // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u>
+ 1544783931U, // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+ 1544784022U, // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 1487922559U, // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3>
+ 1493895256U, // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3>
+ 336380006U, // <u,3,3,3>: Cost 1 vdup3 LHS
+ 1544784386U, // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2824054478U, // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+ 2238286668U, // <u,3,3,6>: Cost 3 vrev <3,u,6,3>
+ 2954069136U, // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+ 336380006U, // <u,3,3,u>: Cost 1 vdup3 LHS
+ 1487929446U, // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
+ 1487930752U, // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
+ 2623171644U, // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+ 2561673366U, // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2>
+ 1487932726U, // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
+ 471043382U, // <u,3,4,5>: Cost 1 vext2 LHS, RHS
+ 1592561012U, // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 2238368598U, // <u,3,4,7>: Cost 3 vrev <3,u,7,4>
+ 471043625U, // <u,3,4,u>: Cost 1 vext2 LHS, RHS
+ 2555707494U, // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS
+ 1574645465U, // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
+ 2567653106U, // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5>
+ 2555709954U, // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6>
+ 1592561606U, // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592561668U, // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1592561762U, // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+ 1750314294U, // <u,3,5,7>: Cost 2 vuzpr LHS, RHS
+ 1750314295U, // <u,3,5,u>: Cost 2 vuzpr LHS, RHS
+ 2623172897U, // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+ 2561688962U, // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6>
+ 1581281795U, // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
+ 2706541204U, // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u>
+ 2623173261U, // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+ 1164495686U, // <u,3,6,5>: Cost 2 vrev <3,u,5,6>
+ 1592562488U, // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592562510U, // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1164716897U, // <u,3,6,u>: Cost 2 vrev <3,u,u,6>
+ 1487954022U, // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
+ 1487955331U, // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
+ 1493928028U, // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
+ 2561697942U, // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2>
+ 1487957302U, // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
+ 2707352311U, // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u>
+ 2655024623U, // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u>
+ 1592563308U, // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1487959854U, // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
+ 1544787667U, // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+ 471045934U, // <u,3,u,1>: Cost 1 vext2 LHS, LHS
+ 1549432709U, // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+ 336380006U, // <u,3,u,3>: Cost 1 vdup3 LHS
+ 1544788031U, // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+ 471046298U, // <u,3,u,5>: Cost 1 vext2 LHS, RHS
+ 1549433040U, // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+ 1750314537U, // <u,3,u,7>: Cost 2 vuzpr LHS, RHS
+ 471046501U, // <u,3,u,u>: Cost 1 vext2 LHS, LHS
+ 2625167360U, // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0>
+ 1551425638U, // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
+ 2619195630U, // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4>
+ 2619343104U, // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+ 2625167698U, // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5>
+ 1638329234U, // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+ 1638329244U, // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+ 3787803556U, // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+ 1551426205U, // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
+ 2555748454U, // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS
+ 2625168180U, // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1>
+ 1551426503U, // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
+ 2625168344U, // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3>
+ 2555751734U, // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS
+ 1860554038U, // <u,4,1,5>: Cost 2 vzipl LHS, RHS
+ 2689879022U, // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+ 2592248852U, // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+ 1555408301U, // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4>
+ 2555756646U, // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS
+ 2625168943U, // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u>
+ 2625169000U, // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2>
+ 2619197134U, // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5>
+ 2555759926U, // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS
+ 2712071222U, // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+ 1994771766U, // <u,4,2,6>: Cost 2 vtrnl LHS, RHS
+ 2592257045U, // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+ 1994771784U, // <u,4,2,u>: Cost 2 vtrnl LHS, RHS
+ 2625169558U, // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2>
+ 2567709594U, // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4>
+ 2567710817U, // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3>
+ 2625169820U, // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3>
+ 2625169922U, // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6>
+ 2954069710U, // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 2954068172U, // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+ 3903849472U, // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7>
+ 2954068174U, // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+ 1505919078U, // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
+ 2567717831U, // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4>
+ 2567719010U, // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4>
+ 2570373542U, // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+ 161926454U, // <u,4,4,4>: Cost 1 vdup0 RHS
+ 1551428918U, // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
+ 1638329572U, // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+ 2594927963U, // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+ 161926454U, // <u,4,4,u>: Cost 1 vdup0 RHS
+ 1493983334U, // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
+ 2689879301U, // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+ 1493985379U, // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
+ 2567727254U, // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2>
+ 1493986614U, // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
+ 1863535926U, // <u,4,5,5>: Cost 2 vzipl RHS, RHS
+ 537750838U, // <u,4,5,6>: Cost 1 vext3 LHS, RHS
+ 2830110006U, // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+ 537750856U, // <u,4,5,u>: Cost 1 vext3 LHS, RHS
+ 1482047590U, // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS
+ 2555790070U, // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2>
+ 2555790952U, // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2>
+ 2555791510U, // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2>
+ 1482050870U, // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
+ 2689879422U, // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+ 1997753654U, // <u,4,6,6>: Cost 2 vtrnl RHS, RHS
+ 2712071562U, // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+ 1482053422U, // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS
+ 2567741542U, // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS
+ 2567742362U, // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4>
+ 2567743589U, // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7>
+ 2573716286U, // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7>
+ 2567744822U, // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS
+ 2712071624U, // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+ 96808489U, // <u,4,7,6>: Cost 1 vrev RHS
+ 2651715180U, // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7>
+ 96955963U, // <u,4,7,u>: Cost 1 vrev RHS
+ 1482063974U, // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS
+ 1551431470U, // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
+ 1494009958U, // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
+ 2555807894U, // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2>
+ 161926454U, // <u,4,u,4>: Cost 1 vdup0 RHS
+ 1551431834U, // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS
+ 537751081U, // <u,4,u,6>: Cost 1 vext3 LHS, RHS
+ 2830110249U, // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+ 537751099U, // <u,4,u,u>: Cost 1 vext3 LHS, RHS
+ 2631811072U, // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0>
+ 1558069350U, // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS
+ 2619203823U, // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5>
+ 2619867456U, // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5>
+ 1546273106U, // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+ 2733010539U, // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+ 2597622682U, // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5>
+ 1176539396U, // <u,5,0,7>: Cost 2 vrev <5,u,7,0>
+ 1558069917U, // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS
+ 1505968230U, // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
+ 2624512887U, // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5>
+ 2631811990U, // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0>
+ 2618541056U, // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7>
+ 1505971510U, // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS
+ 2627167419U, // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5>
+ 2579714554U, // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3>
+ 1638330064U, // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+ 1638477529U, // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+ 2561802342U, // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS
+ 2561803264U, // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7>
+ 2631149217U, // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5>
+ 1558071026U, // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5>
+ 2561805622U, // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS
+ 2714062607U, // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+ 2631813050U, // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7>
+ 3092335926U, // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS
+ 1561389191U, // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5>
+ 2561810534U, // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS
+ 2561811857U, // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3>
+ 2631813474U, // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u>
+ 2631813532U, // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3>
+ 2619869698U, // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6>
+ 3001847002U, // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5>
+ 2954070530U, // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2018749750U, // <u,5,3,7>: Cost 2 vtrnr LHS, RHS
+ 2018749751U, // <u,5,3,u>: Cost 2 vtrnr LHS, RHS
+ 2573762662U, // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS
+ 2620017634U, // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+ 2573764338U, // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5>
+ 2573765444U, // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4>
+ 1570680053U, // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5>
+ 1558072630U, // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS
+ 2645749143U, // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5>
+ 1638330310U, // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+ 1558072873U, // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS
+ 1506000998U, // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
+ 2561827984U, // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7>
+ 2579744360U, // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2>
+ 2579744918U, // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2>
+ 1506004278U, // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS
+ 229035318U, // <u,5,5,5>: Cost 1 vdup1 RHS
+ 2712072206U, // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+ 1638330392U, // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+ 229035318U, // <u,5,5,u>: Cost 1 vdup1 RHS
+ 1500037222U, // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS
+ 2561836436U, // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6>
+ 2567809133U, // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6>
+ 1500040006U, // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6>
+ 1500040502U, // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS
+ 2714062935U, // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+ 2712072288U, // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+ 27705344U, // <u,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,6,u>: Cost 0 copy RHS
+ 1488101478U, // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
+ 1488102805U, // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
+ 2561844840U, // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2>
+ 2561845398U, // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2>
+ 1488104758U, // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
+ 1638330536U, // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+ 2712072362U, // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+ 2042965302U, // <u,5,7,7>: Cost 2 vtrnr RHS, RHS
+ 1488107310U, // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS
+ 1488109670U, // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS
+ 1488110998U, // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u>
+ 2561853032U, // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2>
+ 1500056392U, // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u>
+ 1488112950U, // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS
+ 229035318U, // <u,5,u,5>: Cost 1 vdup1 RHS
+ 2954111490U, // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 27705344U, // <u,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,u,u>: Cost 0 copy RHS
+ 2619211776U, // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0>
+ 1545470054U, // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1545470192U, // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6>
+ 2255958969U, // <u,6,0,3>: Cost 3 vrev <6,u,3,0>
+ 1546797458U, // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
+ 2720624971U, // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u>
+ 2256180180U, // <u,6,0,6>: Cost 3 vrev <6,u,6,0>
+ 2960682294U, // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS
+ 1545470621U, // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1182004127U, // <u,6,1,0>: Cost 2 vrev <6,u,0,1>
+ 2619212596U, // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1>
+ 2619212694U, // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0>
+ 2619212760U, // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3>
+ 2626511979U, // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6>
+ 2619212944U, // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7>
+ 2714063264U, // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+ 2967326006U, // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS
+ 1182594023U, // <u,6,1,u>: Cost 2 vrev <6,u,u,1>
+ 1506050150U, // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
+ 2579792630U, // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2>
+ 2619213416U, // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2>
+ 2619213478U, // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1>
+ 1506053430U, // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS
+ 2633148309U, // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6>
+ 2619213754U, // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7>
+ 1638330874U, // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+ 1638478339U, // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+ 2619213974U, // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2>
+ 2255836074U, // <u,6,3,1>: Cost 3 vrev <6,u,1,3>
+ 2255909811U, // <u,6,3,2>: Cost 3 vrev <6,u,2,3>
+ 2619214236U, // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3>
+ 1564715549U, // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
+ 2639121006U, // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6>
+ 3001847012U, // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+ 1880329526U, // <u,6,3,7>: Cost 2 vzipr LHS, RHS
+ 1880329527U, // <u,6,3,u>: Cost 2 vzipr LHS, RHS
+ 2567864422U, // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS
+ 2733011558U, // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3>
+ 2567866484U, // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4>
+ 2638458005U, // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u>
+ 1570540772U, // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+ 1545473334U, // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
+ 1572015512U, // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6>
+ 2960715062U, // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS
+ 1545473577U, // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS
+ 2567872614U, // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS
+ 2645757648U, // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3>
+ 2567874490U, // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7>
+ 2576501250U, // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+ 1576660943U, // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
+ 2645757956U, // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5>
+ 2645758050U, // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0>
+ 2824080694U, // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS
+ 1182626795U, // <u,6,5,u>: Cost 2 vrev <6,u,u,5>
+ 1506082918U, // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
+ 2579825398U, // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2>
+ 2645758458U, // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3>
+ 2579826838U, // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2>
+ 1506086198U, // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS
+ 2579828432U, // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3>
+ 296144182U, // <u,6,6,6>: Cost 1 vdup2 RHS
+ 1638331202U, // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+ 296144182U, // <u,6,6,u>: Cost 1 vdup2 RHS
+ 432349286U, // <u,6,7,0>: Cost 1 vext1 RHS, LHS
+ 1506091766U, // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+ 1506092648U, // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1506093206U, // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 432352809U, // <u,6,7,4>: Cost 1 vext1 RHS, RHS
+ 1506094800U, // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 1506095610U, // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3>
+ 1906904374U, // <u,6,7,7>: Cost 2 vzipr RHS, RHS
+ 432355118U, // <u,6,7,u>: Cost 1 vext1 RHS, LHS
+ 432357478U, // <u,6,u,0>: Cost 1 vext1 RHS, LHS
+ 1545475886U, // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1506100840U, // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1506101398U, // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 432361002U, // <u,6,u,4>: Cost 1 vext1 RHS, RHS
+ 1545476250U, // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
+ 296144182U, // <u,6,u,6>: Cost 1 vdup2 RHS
+ 1880370486U, // <u,6,u,7>: Cost 2 vzipr LHS, RHS
+ 432363310U, // <u,6,u,u>: Cost 1 vext1 RHS, LHS
+ 1571356672U, // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497614950U, // <u,7,0,1>: Cost 1 vext2 RHS, LHS
+ 1571356836U, // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2573880146U, // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0>
+ 1571357010U, // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1512083716U, // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
+ 2621874741U, // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7>
+ 2585826298U, // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2>
+ 497615517U, // <u,7,0,u>: Cost 1 vext2 RHS, LHS
+ 1571357430U, // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571357492U, // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1571357590U, // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+ 1552114715U, // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7>
+ 2573888822U, // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS
+ 1553441981U, // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
+ 2627847438U, // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7>
+ 2727408775U, // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u>
+ 1555432880U, // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7>
+ 2629838337U, // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7>
+ 1188058754U, // <u,7,2,1>: Cost 2 vrev <7,u,1,2>
+ 1571358312U, // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571358374U, // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 2632492869U, // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7>
+ 2633156502U, // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7>
+ 1560078311U, // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
+ 2728072408U, // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u>
+ 1561405577U, // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7>
+ 1571358870U, // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2627184913U, // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u>
+ 2633820523U, // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u>
+ 1571359132U, // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571359234U, // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 1512108295U, // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3>
+ 1518080992U, // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
+ 2640456465U, // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7>
+ 1571359518U, // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1571359634U, // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2573911067U, // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7>
+ 2645101622U, // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+ 2573912918U, // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4>
+ 1571359952U, // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497618248U, // <u,7,4,5>: Cost 1 vext2 RHS, RHS
+ 1571360116U, // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 2645102024U, // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0>
+ 497618473U, // <u,7,4,u>: Cost 1 vext2 RHS, RHS
+ 2645102152U, // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+ 1571360464U, // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2645102334U, // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+ 2645102447U, // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+ 1571360710U, // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571360772U, // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1571360866U, // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+ 1571360936U, // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1571361017U, // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+ 1530044518U, // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS
+ 2645103016U, // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+ 1571361274U, // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2645103154U, // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+ 1530047798U, // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS
+ 1188386474U, // <u,7,6,5>: Cost 2 vrev <7,u,5,6>
+ 1571361592U, // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+ 1571361614U, // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 1571361695U, // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+ 1571361786U, // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+ 2573935616U, // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7>
+ 2645103781U, // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2>
+ 2573937497U, // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7>
+ 1571362150U, // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+ 1512141067U, // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7>
+ 1518113764U, // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7>
+ 363253046U, // <u,7,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <u,7,7,u>: Cost 1 vdup3 RHS
+ 1571362515U, // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+ 497620782U, // <u,7,u,1>: Cost 1 vext2 RHS, LHS
+ 1571362693U, // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+ 1571362748U, // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1571362879U, // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+ 497621146U, // <u,7,u,5>: Cost 1 vext2 RHS, RHS
+ 1571363024U, // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+ 363253046U, // <u,7,u,7>: Cost 1 vdup3 RHS
+ 497621349U, // <u,7,u,u>: Cost 1 vext2 RHS, LHS
+ 135053414U, // <u,u,0,0>: Cost 1 vdup0 LHS
+ 471081121U, // <u,u,0,1>: Cost 1 vext2 LHS, LHS
+ 1544822948U, // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 1616140005U, // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+ 1544823122U, // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 1512157453U, // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
+ 1662220032U, // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+ 1194457487U, // <u,u,0,7>: Cost 2 vrev <u,u,7,0>
+ 471081629U, // <u,u,0,u>: Cost 1 vext2 LHS, LHS
+ 1544823542U, // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 202162278U, // <u,u,1,1>: Cost 1 vdup1 LHS
+ 537753390U, // <u,u,1,2>: Cost 1 vext3 LHS, LHS
+ 1544823768U, // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 1494248758U, // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS
+ 1544823952U, // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 1518138343U, // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
+ 1640322907U, // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+ 537753444U, // <u,u,1,u>: Cost 1 vext3 LHS, LHS
+ 1482309734U, // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS
+ 1194031451U, // <u,u,2,1>: Cost 2 vrev <u,u,1,2>
+ 269271142U, // <u,u,2,2>: Cost 1 vdup2 LHS
+ 835584U, // <u,u,2,3>: Cost 0 copy LHS
+ 1482313014U, // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS
+ 2618566504U, // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544824762U, // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 1638479788U, // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+ 835584U, // <u,u,2,u>: Cost 0 copy LHS
+ 408576723U, // <u,u,3,0>: Cost 1 vext1 LHS, LHS
+ 1482318582U, // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 120371557U, // <u,u,3,2>: Cost 1 vrev LHS
+ 336380006U, // <u,u,3,3>: Cost 1 vdup3 LHS
+ 408579382U, // <u,u,3,4>: Cost 1 vext1 LHS, RHS
+ 1616140271U, // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+ 1530098170U, // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1880329544U, // <u,u,3,7>: Cost 2 vzipr LHS, RHS
+ 408581934U, // <u,u,3,u>: Cost 1 vext1 LHS, LHS
+ 1488298086U, // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
+ 1488299437U, // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
+ 1659271204U, // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+ 1194195311U, // <u,u,4,3>: Cost 2 vrev <u,u,3,4>
+ 161926454U, // <u,u,4,4>: Cost 1 vdup0 RHS
+ 471084342U, // <u,u,4,5>: Cost 1 vext2 LHS, RHS
+ 1571368308U, // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 1640323153U, // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+ 471084585U, // <u,u,4,u>: Cost 1 vext2 LHS, RHS
+ 1494278246U, // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS
+ 1571368656U, // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 1494280327U, // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
+ 1616140415U, // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+ 1494281526U, // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS
+ 229035318U, // <u,u,5,5>: Cost 1 vdup1 RHS
+ 537753754U, // <u,u,5,6>: Cost 1 vext3 LHS, RHS
+ 1750355254U, // <u,u,5,7>: Cost 2 vuzpr LHS, RHS
+ 537753772U, // <u,u,5,u>: Cost 1 vext3 LHS, RHS
+ 1482342502U, // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS
+ 2556084982U, // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2>
+ 1571369466U, // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 1611938000U, // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+ 1482345782U, // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS
+ 1194359171U, // <u,u,6,5>: Cost 2 vrev <u,u,5,6>
+ 296144182U, // <u,u,6,6>: Cost 1 vdup2 RHS
+ 27705344U, // <u,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,u,6,u>: Cost 0 copy RHS
+ 432496742U, // <u,u,7,0>: Cost 1 vext1 RHS, LHS
+ 1488324016U, // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
+ 1494296713U, // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
+ 1906901148U, // <u,u,7,3>: Cost 2 vzipr RHS, LHS
+ 432500283U, // <u,u,7,4>: Cost 1 vext1 RHS, RHS
+ 1506242256U, // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 120699277U, // <u,u,7,6>: Cost 1 vrev RHS
+ 363253046U, // <u,u,7,7>: Cost 1 vdup3 RHS
+ 432502574U, // <u,u,7,u>: Cost 1 vext1 RHS, LHS
+ 408617688U, // <u,u,u,0>: Cost 1 vext1 LHS, LHS
+ 471086894U, // <u,u,u,1>: Cost 1 vext2 LHS, LHS
+ 537753957U, // <u,u,u,2>: Cost 1 vext3 LHS, LHS
+ 835584U, // <u,u,u,3>: Cost 0 copy LHS
+ 408620342U, // <u,u,u,4>: Cost 1 vext1 LHS, RHS
+ 471087258U, // <u,u,u,5>: Cost 1 vext2 LHS, RHS
+ 537753997U, // <u,u,u,6>: Cost 1 vext3 LHS, RHS
+ 27705344U, // <u,u,u,7>: Cost 0 copy RHS
+ 835584U, // <u,u,u,u>: Cost 0 copy LHS
+ 0
+};
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
new file mode 100644
index 0000000..ad51bc1
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -0,0 +1,40 @@
+//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(tii, sti) {
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
new file mode 100644
index 0000000..8edfb9a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
@@ -0,0 +1,33 @@
+//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMREGISTERINFO_H
+#define ARMREGISTERINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+ class Type;
+
+struct ARMRegisterInfo : public ARMBaseRegisterInfo {
+public:
+ ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
new file mode 100644
index 0000000..22d15b5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -0,0 +1,559 @@
+//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the ARM register file
+//===----------------------------------------------------------------------===//
+
+// Registers are identified with 4-bit ID numbers.
+class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
+ field bits<4> Num;
+ let Namespace = "ARM";
+ let SubRegs = subregs;
+}
+
+class ARMFReg<bits<6> num, string n> : Register<n> {
+ field bits<6> Num;
+ let Namespace = "ARM";
+}
+
+// Subregister indices.
+let Namespace = "ARM" in {
+// Note: Code depends on these having consecutive numbers.
+def ssub_0 : SubRegIndex;
+def ssub_1 : SubRegIndex;
+def ssub_2 : SubRegIndex; // In a Q reg.
+def ssub_3 : SubRegIndex;
+def ssub_4 : SubRegIndex; // In a QQ reg.
+def ssub_5 : SubRegIndex;
+def ssub_6 : SubRegIndex;
+def ssub_7 : SubRegIndex;
+def ssub_8 : SubRegIndex; // In a QQQQ reg.
+def ssub_9 : SubRegIndex;
+def ssub_10 : SubRegIndex;
+def ssub_11 : SubRegIndex;
+def ssub_12 : SubRegIndex;
+def ssub_13 : SubRegIndex;
+def ssub_14 : SubRegIndex;
+def ssub_15 : SubRegIndex;
+
+def dsub_0 : SubRegIndex;
+def dsub_1 : SubRegIndex;
+def dsub_2 : SubRegIndex;
+def dsub_3 : SubRegIndex;
+def dsub_4 : SubRegIndex;
+def dsub_5 : SubRegIndex;
+def dsub_6 : SubRegIndex;
+def dsub_7 : SubRegIndex;
+
+def qsub_0 : SubRegIndex;
+def qsub_1 : SubRegIndex;
+def qsub_2 : SubRegIndex;
+def qsub_3 : SubRegIndex;
+
+def qqsub_0 : SubRegIndex;
+def qqsub_1 : SubRegIndex;
+}
+
+// Integer registers
+def R0 : ARMReg< 0, "r0">, DwarfRegNum<[0]>;
+def R1 : ARMReg< 1, "r1">, DwarfRegNum<[1]>;
+def R2 : ARMReg< 2, "r2">, DwarfRegNum<[2]>;
+def R3 : ARMReg< 3, "r3">, DwarfRegNum<[3]>;
+def R4 : ARMReg< 4, "r4">, DwarfRegNum<[4]>;
+def R5 : ARMReg< 5, "r5">, DwarfRegNum<[5]>;
+def R6 : ARMReg< 6, "r6">, DwarfRegNum<[6]>;
+def R7 : ARMReg< 7, "r7">, DwarfRegNum<[7]>;
+def R8 : ARMReg< 8, "r8">, DwarfRegNum<[8]>;
+def R9 : ARMReg< 9, "r9">, DwarfRegNum<[9]>;
+def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : ARMReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>;
+def SP : ARMReg<13, "sp">, DwarfRegNum<[13]>;
+def LR : ARMReg<14, "lr">, DwarfRegNum<[14]>;
+def PC : ARMReg<15, "pc">, DwarfRegNum<[15]>;
+
+// Float registers
+def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">;
+def S2 : ARMFReg< 2, "s2">; def S3 : ARMFReg< 3, "s3">;
+def S4 : ARMFReg< 4, "s4">; def S5 : ARMFReg< 5, "s5">;
+def S6 : ARMFReg< 6, "s6">; def S7 : ARMFReg< 7, "s7">;
+def S8 : ARMFReg< 8, "s8">; def S9 : ARMFReg< 9, "s9">;
+def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">;
+def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">;
+def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">;
+def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">;
+def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">;
+def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">;
+def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">;
+def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
+def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
+def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
+def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+let SubRegIndices = [ssub_0, ssub_1] in {
+def D0 : ARMReg< 0, "d0", [S0, S1]>;
+def D1 : ARMReg< 1, "d1", [S2, S3]>;
+def D2 : ARMReg< 2, "d2", [S4, S5]>;
+def D3 : ARMReg< 3, "d3", [S6, S7]>;
+def D4 : ARMReg< 4, "d4", [S8, S9]>;
+def D5 : ARMReg< 5, "d5", [S10, S11]>;
+def D6 : ARMReg< 6, "d6", [S12, S13]>;
+def D7 : ARMReg< 7, "d7", [S14, S15]>;
+def D8 : ARMReg< 8, "d8", [S16, S17]>;
+def D9 : ARMReg< 9, "d9", [S18, S19]>;
+def D10 : ARMReg<10, "d10", [S20, S21]>;
+def D11 : ARMReg<11, "d11", [S22, S23]>;
+def D12 : ARMReg<12, "d12", [S24, S25]>;
+def D13 : ARMReg<13, "d13", [S26, S27]>;
+def D14 : ARMReg<14, "d14", [S28, S29]>;
+def D15 : ARMReg<15, "d15", [S30, S31]>;
+}
+
+// VFP3 defines 16 additional double registers
+def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d17">;
+def D18 : ARMFReg<18, "d18">; def D19 : ARMFReg<19, "d19">;
+def D20 : ARMFReg<20, "d20">; def D21 : ARMFReg<21, "d21">;
+def D22 : ARMFReg<22, "d22">; def D23 : ARMFReg<23, "d23">;
+def D24 : ARMFReg<24, "d24">; def D25 : ARMFReg<25, "d25">;
+def D26 : ARMFReg<26, "d26">; def D27 : ARMFReg<27, "d27">;
+def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">;
+def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">;
+
+// Advanced SIMD (NEON) defines 16 quad-word aliases
+let SubRegIndices = [dsub_0, dsub_1],
+ CompositeIndices = [(ssub_2 dsub_1, ssub_0),
+ (ssub_3 dsub_1, ssub_1)] in {
+def Q0 : ARMReg< 0, "q0", [D0, D1]>;
+def Q1 : ARMReg< 1, "q1", [D2, D3]>;
+def Q2 : ARMReg< 2, "q2", [D4, D5]>;
+def Q3 : ARMReg< 3, "q3", [D6, D7]>;
+def Q4 : ARMReg< 4, "q4", [D8, D9]>;
+def Q5 : ARMReg< 5, "q5", [D10, D11]>;
+def Q6 : ARMReg< 6, "q6", [D12, D13]>;
+def Q7 : ARMReg< 7, "q7", [D14, D15]>;
+}
+let SubRegIndices = [dsub_0, dsub_1] in {
+def Q8 : ARMReg< 8, "q8", [D16, D17]>;
+def Q9 : ARMReg< 9, "q9", [D18, D19]>;
+def Q10 : ARMReg<10, "q10", [D20, D21]>;
+def Q11 : ARMReg<11, "q11", [D22, D23]>;
+def Q12 : ARMReg<12, "q12", [D24, D25]>;
+def Q13 : ARMReg<13, "q13", [D26, D27]>;
+def Q14 : ARMReg<14, "q14", [D28, D29]>;
+def Q15 : ARMReg<15, "q15", [D30, D31]>;
+}
+
+// Pseudo 256-bit registers to represent pairs of Q registers. These should
+// never be present in the emitted code.
+// These are used for NEON load / store instructions, e.g., vld4, vst3.
+// NOTE: It's possible to define more QQ registers since technically the
+// starting D register number doesn't have to be multiple of 4, e.g.,
+// D1, D2, D3, D4 would be a legal quad, but that would make the subregister
+// stuff very messy.
+let SubRegIndices = [qsub_0, qsub_1] in {
+let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1),
+ (ssub_4 qsub_1, ssub_0), (ssub_5 qsub_1, ssub_1),
+ (ssub_6 qsub_1, ssub_2), (ssub_7 qsub_1, ssub_3)] in {
+def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>;
+def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>;
+def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>;
+def QQ3 : ARMReg<3, "qq3", [Q6, Q7]>;
+}
+let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in {
+def QQ4 : ARMReg<4, "qq4", [Q8, Q9]>;
+def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>;
+def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>;
+def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>;
+}
+}
+
+// Pseudo 512-bit registers to represent four consecutive Q registers.
+let SubRegIndices = [qqsub_0, qqsub_1] in {
+let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
+ (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1),
+ (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3),
+ (ssub_8 qqsub_1, ssub_0), (ssub_9 qqsub_1, ssub_1),
+ (ssub_10 qqsub_1, ssub_2), (ssub_11 qqsub_1, ssub_3),
+ (ssub_12 qqsub_1, ssub_4), (ssub_13 qqsub_1, ssub_5),
+ (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in
+{
+def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>;
+def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>;
+}
+let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
+ (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1),
+ (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in {
+def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>;
+def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
+}
+}
+
+// Current Program Status Register.
+def CPSR : ARMReg<0, "cpsr">;
+def FPSCR : ARMReg<1, "fpscr">;
+def ITSTATE : ARMReg<2, "itstate">;
+
+// Special Registers - only available in privileged mode.
+def FPSID : ARMReg<0, "fpsid">;
+def FPEXC : ARMReg<8, "fpexc">;
+
+// Register classes.
+//
+// pc == Program Counter
+// lr == Link Register
+// sp == Stack Pointer
+// r12 == ip (scratch)
+// r7 == Frame Pointer (thumb-style backtraces)
+// r9 == May be reserved as Thread Register
+// r11 == Frame Pointer (arm-style backtraces)
+// r10 == Stack Limit
+//
+def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
+ R7, R8, R9, R10, R11, R12,
+ SP, LR, PC]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ static const unsigned ARM_GPR_AO[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11 };
+
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we
+ // don't know how to spill them. If we make our prologue/epilogue code
+ // smarter at some point, we can go back to using the above allocation
+ // orders for the Thumb1 instructions that know how to use hi regs.
+ static const unsigned THUMB_GPR_AO[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
+
+ GPRClass::iterator
+ GPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.isThumb1Only())
+ return THUMB_GPR_AO;
+ return ARM_GPR_AO;
+ }
+
+ GPRClass::iterator
+ GPRClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.isThumb1Only())
+ return THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
+ return ARM_GPR_AO + (sizeof(ARM_GPR_AO)/sizeof(unsigned));
+ }
+ }];
+}
+
+// restricted GPR register class. Many Thumb2 instructions allow the full
+// register range for operands, but have undefined behaviours when PC
+// or SP (R13 or R15) are used. The ARM ISA refers to these operands
+// via the BadReg() pseudo-code description.
+def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
+ R7, R8, R9, R10, R11, R12, LR]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ static const unsigned ARM_rGPR_AO[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10,
+ ARM::R11 };
+
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we
+ // don't know how to spill them. If we make our prologue/epilogue code
+ // smarter at some point, we can go back to using the above allocation
+ // orders for the Thumb1 instructions that know how to use hi regs.
+ static const unsigned THUMB_rGPR_AO[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
+
+ rGPRClass::iterator
+ rGPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.isThumb1Only())
+ return THUMB_rGPR_AO;
+ return ARM_rGPR_AO;
+ }
+
+ rGPRClass::iterator
+ rGPRClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+
+ if (Subtarget.isThumb1Only())
+ return THUMB_rGPR_AO + (sizeof(THUMB_rGPR_AO)/sizeof(unsigned));
+ return ARM_rGPR_AO + (sizeof(ARM_rGPR_AO)/sizeof(unsigned));
+ }
+ }];
+}
+
+// Thumb registers are R0-R7 normally. Some instructions can still use
+// the general GPR register class above (MOV, e.g.)
+def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {}
+
+// For tail calls, we can't use callee-saved registers, as they are restored
+// to the saved value before the tail call, which would clobber a call address.
+// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
+// this class and the preceding one(!) This is what we want.
+def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // R9 is available.
+ static const unsigned ARM_GPR_R9_TC[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R9, ARM::R12 };
+ // R9 is not available.
+ static const unsigned ARM_GPR_NOR9_TC[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R12 };
+
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we
+ // don't know how to spill them. If we make our prologue/epilogue code
+ // smarter at some point, we can go back to using the above allocation
+ // orders for the Thumb1 instructions that know how to use hi regs.
+ static const unsigned THUMB_GPR_AO_TC[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+ tcGPRClass::iterator
+ tcGPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.isThumb1Only())
+ return THUMB_GPR_AO_TC;
+ return Subtarget.isTargetDarwin() ? ARM_GPR_R9_TC : ARM_GPR_NOR9_TC;
+ }
+
+ tcGPRClass::iterator
+ tcGPRClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+
+ if (Subtarget.isThumb1Only())
+ return THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
+
+ return Subtarget.isTargetDarwin() ?
+ ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)) :
+ ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
+ }
+ }];
+}
+
+
+// Scalar single precision floating point register class..
+def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
+ S23, S24, S25, S26, S27, S28, S29, S30, S31]>;
+
+// Subset of SPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def SPR_8 : RegisterClass<"ARM", [f32], 32,
+ [S0, S1, S2, S3, S4, S5, S6, S7,
+ S8, S9, S10, S11, S12, S13, S14, S15]>;
+
+// Scalar double precision floating point / generic 64-bit vector register
+// class.
+// ARM requires only word alignment for double. It's more performant if it
+// is double-word alignment though.
+def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+ [D0, D1, D2, D3, D4, D5, D6, D7,
+ D8, D9, D10, D11, D12, D13, D14, D15,
+ D16, D17, D18, D19, D20, D21, D22, D23,
+ D24, D25, D26, D27, D28, D29, D30, D31]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // VFP2 / VFPv3-D16
+ static const unsigned ARM_DPR_VFP2[] = {
+ ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7,
+ ARM::D8, ARM::D9, ARM::D10, ARM::D11,
+ ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
+ // VFP3: D8-D15 are callee saved and should be allocated last.
+ // Save other low registers for use as DPR_VFP2 and DPR_8 classes.
+ static const unsigned ARM_DPR_VFP3[] = {
+ ARM::D16, ARM::D17, ARM::D18, ARM::D19,
+ ARM::D20, ARM::D21, ARM::D22, ARM::D23,
+ ARM::D24, ARM::D25, ARM::D26, ARM::D27,
+ ARM::D28, ARM::D29, ARM::D30, ARM::D31,
+ ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7,
+ ARM::D8, ARM::D9, ARM::D10, ARM::D11,
+ ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
+
+ DPRClass::iterator
+ DPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.hasVFP3() && !Subtarget.hasD16())
+ return ARM_DPR_VFP3;
+ return ARM_DPR_VFP2;
+ }
+
+ DPRClass::iterator
+ DPRClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.hasVFP3() && !Subtarget.hasD16())
+ return ARM_DPR_VFP3 + (sizeof(ARM_DPR_VFP3)/sizeof(unsigned));
+ else
+ return ARM_DPR_VFP2 + (sizeof(ARM_DPR_VFP2)/sizeof(unsigned));
+ }
+ }];
+}
+
+// Subset of DPR that are accessible with VFP2 (and so that also have
+// 32-bit SPR subregs).
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+ [D0, D1, D2, D3, D4, D5, D6, D7,
+ D8, D9, D10, D11, D12, D13, D14, D15]> {
+ let SubRegClasses = [(SPR ssub_0, ssub_1)];
+}
+
+// Subset of DPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+ [D0, D1, D2, D3, D4, D5, D6, D7]> {
+ let SubRegClasses = [(SPR_8 ssub_0, ssub_1)];
+}
+
+// Generic 128-bit vector register class.
+def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
+ Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15]> {
+ let SubRegClasses = [(DPR dsub_0, dsub_1)];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Q4-Q7 are callee saved and should be allocated last.
+ // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
+ static const unsigned ARM_QPR[] = {
+ ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11,
+ ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15,
+ ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
+ ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7 };
+
+ QPRClass::iterator
+ QPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ return ARM_QPR;
+ }
+
+ QPRClass::iterator
+ QPRClass::allocation_order_end(const MachineFunction &MF) const {
+ return ARM_QPR + (sizeof(ARM_QPR)/sizeof(unsigned));
+ }
+ }];
+}
+
+// Subset of QPR that have 32-bit SPR subregs.
+def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128,
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]> {
+ let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3),
+ (DPR_VFP2 dsub_0, dsub_1)];
+}
+
+// Subset of QPR that have DPR_8 and SPR_8 subregs.
+def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128,
+ [Q0, Q1, Q2, Q3]> {
+ let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3),
+ (DPR_8 dsub_0, dsub_1)];
+}
+
+// Pseudo 256-bit vector register class to model pairs of Q registers
+// (4 consecutive D registers).
+def QQPR : RegisterClass<"ARM", [v4i64],
+ 256,
+ [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> {
+ let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
+ (QPR qsub_0, qsub_1)];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // QQ2-QQ3 are callee saved and should be allocated last.
+ // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
+ static const unsigned ARM_QQPR[] = {
+ ARM::QQ4, ARM::QQ5, ARM::QQ6, ARM::QQ7,
+ ARM::QQ0, ARM::QQ1, ARM::QQ2, ARM::QQ3 };
+
+ QQPRClass::iterator
+ QQPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ return ARM_QQPR;
+ }
+
+ QQPRClass::iterator
+ QQPRClass::allocation_order_end(const MachineFunction &MF) const {
+ return ARM_QQPR + (sizeof(ARM_QQPR)/sizeof(unsigned));
+ }
+ }];
+}
+
+// Subset of QQPR that have 32-bit SPR subregs.
+def QQPR_VFP2 : RegisterClass<"ARM", [v4i64],
+ 256,
+ [QQ0, QQ1, QQ2, QQ3]> {
+ let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3),
+ (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3),
+ (QPR_VFP2 qsub_0, qsub_1)];
+
+}
+
+// Pseudo 512-bit vector register class to model 4 consecutive Q registers
+// (8 consecutive D registers).
+def QQQQPR : RegisterClass<"ARM", [v8i64],
+ 256,
+ [QQQQ0, QQQQ1, QQQQ2, QQQQ3]> {
+ let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
+ dsub_4, dsub_5, dsub_6, dsub_7),
+ (QPR qsub_0, qsub_1, qsub_2, qsub_3)];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // QQQQ1 is callee saved and should be allocated last.
+ // Save QQQQ0 for use as QPR_VFP2 and QPR_8 classes.
+ static const unsigned ARM_QQQQPR[] = {
+ ARM::QQQQ2, ARM::QQQQ3, ARM::QQQQ0, ARM::QQQQ1 };
+
+ QQQQPRClass::iterator
+ QQQQPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ return ARM_QQQQPR;
+ }
+
+ QQQQPRClass::iterator
+ QQQQPRClass::allocation_order_end(const MachineFunction &MF) const {
+ return ARM_QQQQPR + (sizeof(ARM_QQQQPR)/sizeof(unsigned));
+ }
+ }];
+}
+
+// Condition code registers.
+def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMRelocations.h b/contrib/llvm/lib/Target/ARM/ARMRelocations.h
new file mode 100644
index 0000000..86e7206
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRelocations.h
@@ -0,0 +1,62 @@
+//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMRELOCATIONS_H
+#define ARMRELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace ARM {
+ enum RelocationType {
+ // reloc_arm_absolute - Absolute relocation, just add the relocated value
+ // to the value already in memory.
+ reloc_arm_absolute,
+
+ // reloc_arm_relative - PC relative relocation, add the relocated value to
+ // the value already in memory, after we adjust it for where the PC is.
+ reloc_arm_relative,
+
+ // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose
+ // addresses are kept locally in a map.
+ reloc_arm_cp_entry,
+
+ // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset
+ // should be divided by 4.
+ reloc_arm_vfp_cp_entry,
+
+ // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool
+ // entry.
+ reloc_arm_machine_cp_entry,
+
+ // reloc_arm_jt_base - PC relative relocation for jump tables whose
+ // addresses are kept locally in a map.
+ reloc_arm_jt_base,
+
+ // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base.
+ reloc_arm_pic_jt,
+
+ // reloc_arm_branch - Branch address relocation.
+ reloc_arm_branch,
+
+ // reloc_arm_movt - MOVT immediate relocation.
+ reloc_arm_movt,
+
+ // reloc_arm_movw - MOVW immediate relocation.
+ reloc_arm_movw
+ };
+ }
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
new file mode 100644
index 0000000..958c5c6
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
@@ -0,0 +1,261 @@
+//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for ARM
+//
+def IIC_iALUx : InstrItinClass;
+def IIC_iALUi : InstrItinClass;
+def IIC_iALUr : InstrItinClass;
+def IIC_iALUsi : InstrItinClass;
+def IIC_iALUsir : InstrItinClass;
+def IIC_iALUsr : InstrItinClass;
+def IIC_iBITi : InstrItinClass;
+def IIC_iBITr : InstrItinClass;
+def IIC_iBITsi : InstrItinClass;
+def IIC_iBITsr : InstrItinClass;
+def IIC_iUNAr : InstrItinClass;
+def IIC_iUNAsi : InstrItinClass;
+def IIC_iEXTr : InstrItinClass;
+def IIC_iEXTAr : InstrItinClass;
+def IIC_iEXTAsr : InstrItinClass;
+def IIC_iCMPi : InstrItinClass;
+def IIC_iCMPr : InstrItinClass;
+def IIC_iCMPsi : InstrItinClass;
+def IIC_iCMPsr : InstrItinClass;
+def IIC_iTSTi : InstrItinClass;
+def IIC_iTSTr : InstrItinClass;
+def IIC_iTSTsi : InstrItinClass;
+def IIC_iTSTsr : InstrItinClass;
+def IIC_iMOVi : InstrItinClass;
+def IIC_iMOVr : InstrItinClass;
+def IIC_iMOVsi : InstrItinClass;
+def IIC_iMOVsr : InstrItinClass;
+def IIC_iMOVix2 : InstrItinClass;
+def IIC_iMOVix2addpc : InstrItinClass;
+def IIC_iMOVix2ld : InstrItinClass;
+def IIC_iMVNi : InstrItinClass;
+def IIC_iMVNr : InstrItinClass;
+def IIC_iMVNsi : InstrItinClass;
+def IIC_iMVNsr : InstrItinClass;
+def IIC_iCMOVi : InstrItinClass;
+def IIC_iCMOVr : InstrItinClass;
+def IIC_iCMOVsi : InstrItinClass;
+def IIC_iCMOVsr : InstrItinClass;
+def IIC_iCMOVix2 : InstrItinClass;
+def IIC_iMUL16 : InstrItinClass;
+def IIC_iMAC16 : InstrItinClass;
+def IIC_iMUL32 : InstrItinClass;
+def IIC_iMAC32 : InstrItinClass;
+def IIC_iMUL64 : InstrItinClass;
+def IIC_iMAC64 : InstrItinClass;
+def IIC_iLoad_i : InstrItinClass;
+def IIC_iLoad_r : InstrItinClass;
+def IIC_iLoad_si : InstrItinClass;
+def IIC_iLoad_iu : InstrItinClass;
+def IIC_iLoad_ru : InstrItinClass;
+def IIC_iLoad_siu : InstrItinClass;
+def IIC_iLoad_bh_i : InstrItinClass;
+def IIC_iLoad_bh_r : InstrItinClass;
+def IIC_iLoad_bh_si : InstrItinClass;
+def IIC_iLoad_bh_iu : InstrItinClass;
+def IIC_iLoad_bh_ru : InstrItinClass;
+def IIC_iLoad_bh_siu : InstrItinClass;
+def IIC_iLoad_d_i : InstrItinClass;
+def IIC_iLoad_d_r : InstrItinClass;
+def IIC_iLoad_d_ru : InstrItinClass;
+def IIC_iLoad_m : InstrItinClass<0>; // micro-coded
+def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded
+def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded
+def IIC_iPop : InstrItinClass<0>; // micro-coded
+def IIC_iPop_Br : InstrItinClass<0>; // micro-coded
+def IIC_iLoadiALU : InstrItinClass;
+def IIC_iStore_i : InstrItinClass;
+def IIC_iStore_r : InstrItinClass;
+def IIC_iStore_si : InstrItinClass;
+def IIC_iStore_iu : InstrItinClass;
+def IIC_iStore_ru : InstrItinClass;
+def IIC_iStore_siu : InstrItinClass;
+def IIC_iStore_bh_i : InstrItinClass;
+def IIC_iStore_bh_r : InstrItinClass;
+def IIC_iStore_bh_si : InstrItinClass;
+def IIC_iStore_bh_iu : InstrItinClass;
+def IIC_iStore_bh_ru : InstrItinClass;
+def IIC_iStore_bh_siu : InstrItinClass;
+def IIC_iStore_d_i : InstrItinClass;
+def IIC_iStore_d_r : InstrItinClass;
+def IIC_iStore_d_ru : InstrItinClass;
+def IIC_iStore_m : InstrItinClass<0>; // micro-coded
+def IIC_iStore_mu : InstrItinClass<0>; // micro-coded
+def IIC_Preload : InstrItinClass;
+def IIC_Br : InstrItinClass;
+def IIC_fpSTAT : InstrItinClass;
+def IIC_fpUNA32 : InstrItinClass;
+def IIC_fpUNA64 : InstrItinClass;
+def IIC_fpCMP32 : InstrItinClass;
+def IIC_fpCMP64 : InstrItinClass;
+def IIC_fpCVTSD : InstrItinClass;
+def IIC_fpCVTDS : InstrItinClass;
+def IIC_fpCVTSH : InstrItinClass;
+def IIC_fpCVTHS : InstrItinClass;
+def IIC_fpCVTIS : InstrItinClass;
+def IIC_fpCVTID : InstrItinClass;
+def IIC_fpCVTSI : InstrItinClass;
+def IIC_fpCVTDI : InstrItinClass;
+def IIC_fpMOVIS : InstrItinClass;
+def IIC_fpMOVID : InstrItinClass;
+def IIC_fpMOVSI : InstrItinClass;
+def IIC_fpMOVDI : InstrItinClass;
+def IIC_fpALU32 : InstrItinClass;
+def IIC_fpALU64 : InstrItinClass;
+def IIC_fpMUL32 : InstrItinClass;
+def IIC_fpMUL64 : InstrItinClass;
+def IIC_fpMAC32 : InstrItinClass;
+def IIC_fpMAC64 : InstrItinClass;
+def IIC_fpDIV32 : InstrItinClass;
+def IIC_fpDIV64 : InstrItinClass;
+def IIC_fpSQRT32 : InstrItinClass;
+def IIC_fpSQRT64 : InstrItinClass;
+def IIC_fpLoad32 : InstrItinClass;
+def IIC_fpLoad64 : InstrItinClass;
+def IIC_fpLoad_m : InstrItinClass<0>; // micro-coded
+def IIC_fpLoad_mu : InstrItinClass<0>; // micro-coded
+def IIC_fpStore32 : InstrItinClass;
+def IIC_fpStore64 : InstrItinClass;
+def IIC_fpStore_m : InstrItinClass<0>; // micro-coded
+def IIC_fpStore_mu : InstrItinClass<0>; // micro-coded
+def IIC_VLD1 : InstrItinClass;
+def IIC_VLD1x2 : InstrItinClass;
+def IIC_VLD1x3 : InstrItinClass;
+def IIC_VLD1x4 : InstrItinClass;
+def IIC_VLD1u : InstrItinClass;
+def IIC_VLD1x2u : InstrItinClass;
+def IIC_VLD1x3u : InstrItinClass;
+def IIC_VLD1x4u : InstrItinClass;
+def IIC_VLD1ln : InstrItinClass;
+def IIC_VLD1lnu : InstrItinClass;
+def IIC_VLD1dup : InstrItinClass;
+def IIC_VLD1dupu : InstrItinClass;
+def IIC_VLD2 : InstrItinClass;
+def IIC_VLD2x2 : InstrItinClass;
+def IIC_VLD2u : InstrItinClass;
+def IIC_VLD2x2u : InstrItinClass;
+def IIC_VLD2ln : InstrItinClass;
+def IIC_VLD2lnu : InstrItinClass;
+def IIC_VLD2dup : InstrItinClass;
+def IIC_VLD2dupu : InstrItinClass;
+def IIC_VLD3 : InstrItinClass;
+def IIC_VLD3ln : InstrItinClass;
+def IIC_VLD3u : InstrItinClass;
+def IIC_VLD3lnu : InstrItinClass;
+def IIC_VLD3dup : InstrItinClass;
+def IIC_VLD3dupu : InstrItinClass;
+def IIC_VLD4 : InstrItinClass;
+def IIC_VLD4ln : InstrItinClass;
+def IIC_VLD4u : InstrItinClass;
+def IIC_VLD4lnu : InstrItinClass;
+def IIC_VLD4dup : InstrItinClass;
+def IIC_VLD4dupu : InstrItinClass;
+def IIC_VST1 : InstrItinClass;
+def IIC_VST1x2 : InstrItinClass;
+def IIC_VST1x3 : InstrItinClass;
+def IIC_VST1x4 : InstrItinClass;
+def IIC_VST1u : InstrItinClass;
+def IIC_VST1x2u : InstrItinClass;
+def IIC_VST1x3u : InstrItinClass;
+def IIC_VST1x4u : InstrItinClass;
+def IIC_VST1ln : InstrItinClass;
+def IIC_VST1lnu : InstrItinClass;
+def IIC_VST2 : InstrItinClass;
+def IIC_VST2x2 : InstrItinClass;
+def IIC_VST2u : InstrItinClass;
+def IIC_VST2x2u : InstrItinClass;
+def IIC_VST2ln : InstrItinClass;
+def IIC_VST2lnu : InstrItinClass;
+def IIC_VST3 : InstrItinClass;
+def IIC_VST3u : InstrItinClass;
+def IIC_VST3ln : InstrItinClass;
+def IIC_VST3lnu : InstrItinClass;
+def IIC_VST4 : InstrItinClass;
+def IIC_VST4u : InstrItinClass;
+def IIC_VST4ln : InstrItinClass;
+def IIC_VST4lnu : InstrItinClass;
+def IIC_VUNAD : InstrItinClass;
+def IIC_VUNAQ : InstrItinClass;
+def IIC_VBIND : InstrItinClass;
+def IIC_VBINQ : InstrItinClass;
+def IIC_VPBIND : InstrItinClass;
+def IIC_VFMULD : InstrItinClass;
+def IIC_VFMULQ : InstrItinClass;
+def IIC_VMOV : InstrItinClass;
+def IIC_VMOVImm : InstrItinClass;
+def IIC_VMOVD : InstrItinClass;
+def IIC_VMOVQ : InstrItinClass;
+def IIC_VMOVIS : InstrItinClass;
+def IIC_VMOVID : InstrItinClass;
+def IIC_VMOVISL : InstrItinClass;
+def IIC_VMOVSI : InstrItinClass;
+def IIC_VMOVDI : InstrItinClass;
+def IIC_VMOVN : InstrItinClass;
+def IIC_VPERMD : InstrItinClass;
+def IIC_VPERMQ : InstrItinClass;
+def IIC_VPERMQ3 : InstrItinClass;
+def IIC_VMACD : InstrItinClass;
+def IIC_VMACQ : InstrItinClass;
+def IIC_VRECSD : InstrItinClass;
+def IIC_VRECSQ : InstrItinClass;
+def IIC_VCNTiD : InstrItinClass;
+def IIC_VCNTiQ : InstrItinClass;
+def IIC_VUNAiD : InstrItinClass;
+def IIC_VUNAiQ : InstrItinClass;
+def IIC_VQUNAiD : InstrItinClass;
+def IIC_VQUNAiQ : InstrItinClass;
+def IIC_VBINiD : InstrItinClass;
+def IIC_VBINiQ : InstrItinClass;
+def IIC_VSUBiD : InstrItinClass;
+def IIC_VSUBiQ : InstrItinClass;
+def IIC_VBINi4D : InstrItinClass;
+def IIC_VBINi4Q : InstrItinClass;
+def IIC_VSUBi4D : InstrItinClass;
+def IIC_VSUBi4Q : InstrItinClass;
+def IIC_VABAD : InstrItinClass;
+def IIC_VABAQ : InstrItinClass;
+def IIC_VSHLiD : InstrItinClass;
+def IIC_VSHLiQ : InstrItinClass;
+def IIC_VSHLi4D : InstrItinClass;
+def IIC_VSHLi4Q : InstrItinClass;
+def IIC_VPALiD : InstrItinClass;
+def IIC_VPALiQ : InstrItinClass;
+def IIC_VMULi16D : InstrItinClass;
+def IIC_VMULi32D : InstrItinClass;
+def IIC_VMULi16Q : InstrItinClass;
+def IIC_VMULi32Q : InstrItinClass;
+def IIC_VMACi16D : InstrItinClass;
+def IIC_VMACi32D : InstrItinClass;
+def IIC_VMACi16Q : InstrItinClass;
+def IIC_VMACi32Q : InstrItinClass;
+def IIC_VEXTD : InstrItinClass;
+def IIC_VEXTQ : InstrItinClass;
+def IIC_VTB1 : InstrItinClass;
+def IIC_VTB2 : InstrItinClass;
+def IIC_VTB3 : InstrItinClass;
+def IIC_VTB4 : InstrItinClass;
+def IIC_VTBX1 : InstrItinClass;
+def IIC_VTBX2 : InstrItinClass;
+def IIC_VTBX3 : InstrItinClass;
+def IIC_VTBX4 : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+def GenericItineraries : ProcessorItineraries<[], [], []>;
+
+include "ARMScheduleV6.td"
+include "ARMScheduleA8.td"
+include "ARMScheduleA9.td"
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td
new file mode 100644
index 0000000..8d86c01
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td
@@ -0,0 +1,1034 @@
+//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A8 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
+// Functional Units.
+def A8_Pipe0 : FuncUnit; // pipeline 0
+def A8_Pipe1 : FuncUnit; // pipeline 1
+def A8_LSPipe : FuncUnit; // Load / store pipeline
+def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe
+def A8_NLSPipe : FuncUnit; // NEON LS pipe
+//
+// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
+//
+def CortexA8Itineraries : ProcessorItineraries<
+ [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe],
+ [], [
+ // Two fully-pipelined integer ALU pipelines
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
+ InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+ InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LSPipe]>], [5]>,
+ //
+ // Move instructions, conditional
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+
+ // Integer multiply pipeline
+ // Result written in E5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+
+ // Integer load pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ // FIXME: lsl by 2 takes 1 cycle.
+ InstrItinData<IIC_iLoad_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+ //
+ // Load multiple, def is the 5th operand. Pipeline 0 only.
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>,
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+ [1, 2, 1, 1, 3]>,
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>,
+ //
+ // Push, def is the 3th operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+ [1, 1, 3]>,
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
+
+
+ // Integer store pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+ //
+ // Store multiple. Pipeline 0 only.
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>], [2]>,
+
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit. We assume
+ // RunFast mode so that NFP pipeline is used for single-precision when
+ // possible.
+ //
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NPipe], 0>,
+ InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [1, 1]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NPipe], 0>,
+ InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<7, [A8_NPipe], 0>,
+ InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NPipe], 0>,
+ InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<8, [A8_NPipe], 0>,
+ InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<8, [A8_NPipe], 0>,
+ InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<9, [A8_NPipe], 0>,
+ InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<11, [A8_NPipe], 0>,
+ InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<19, [A8_NPipe], 0>,
+ InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<20, [A8_NPipe], 0>,
+ InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<29, [A8_NPipe], 0>,
+ InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<19, [A8_NPipe], 0>,
+ InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<29, [A8_NPipe], 0>,
+ InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [2, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [2, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [20, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [20, 20, 1]>,
+
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // FP Load Multiple
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>,
+ //
+ // FP Load Multiple + update
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>,
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Store
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1]>,
+ //
+ // FP Store Multiple
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>,
+ //
+ // FP Store Multiple + update
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>,
+
+ // NEON
+ // Issue through integer pipeline, and execute in NEON unit.
+ //
+ // VLD1
+ InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1]>,
+ // VLD1x2
+ InstrItinData<IIC_VLD1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD1x3
+ InstrItinData<IIC_VLD1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 1]>,
+ //
+ // VLD1x4
+ InstrItinData<IIC_VLD1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD1u
+ InstrItinData<IIC_VLD1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD1x2u
+ InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1]>,
+ //
+ // VLD1x3u
+ InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 2, 1]>,
+ //
+ // VLD1x4u
+ InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1]>,
+ //
+ // VLD1ln
+ InstrItinData<IIC_VLD1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 1, 1, 1]>,
+ //
+ // VLD1lnu
+ InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 2, 1, 1, 1, 1]>,
+ //
+ // VLD1dup
+ InstrItinData<IIC_VLD1dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // VLD1dupu
+ InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1, 1]>,
+ //
+ // VLD2
+ InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD2x2
+ InstrItinData<IIC_VLD2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD2ln
+ InstrItinData<IIC_VLD2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 3, 1, 1, 1, 1]>,
+ //
+ // VLD2u
+ InstrItinData<IIC_VLD2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1, 1, 1]>,
+ //
+ // VLD2x2u
+ InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1]>,
+ //
+ // VLD2lnu
+ InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 3, 2, 1, 1, 1, 1, 1]>,
+ //
+ // VLD2dup
+ InstrItinData<IIC_VLD2dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD2dupu
+ InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1, 1]>,
+ //
+ // VLD3
+ InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3ln
+ InstrItinData<IIC_VLD3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3u
+ InstrItinData<IIC_VLD3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 2, 1]>,
+ //
+ // VLD3lnu
+ InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3dup
+ InstrItinData<IIC_VLD3dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 1]>,
+ //
+ // VLD3dupu
+ InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 2, 1, 1]>,
+ //
+ // VLD4
+ InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 4, 1]>,
+ //
+ // VLD4ln
+ InstrItinData<IIC_VLD4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4u
+ InstrItinData<IIC_VLD4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 4, 2, 1]>,
+ //
+ // VLD4lnu
+ InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4dup
+ InstrItinData<IIC_VLD4dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD4dupu
+ InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1, 1]>,
+ //
+ // VST1
+ InstrItinData<IIC_VST1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1]>,
+ //
+ // VST1x2
+ InstrItinData<IIC_VST1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST1x3
+ InstrItinData<IIC_VST1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST1x4
+ InstrItinData<IIC_VST1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1u
+ InstrItinData<IIC_VST1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST1x2u
+ InstrItinData<IIC_VST1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST1x3u
+ InstrItinData<IIC_VST1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST1x4u
+ InstrItinData<IIC_VST1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1ln
+ InstrItinData<IIC_VST1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1]>,
+ //
+ // VST1lnu
+ InstrItinData<IIC_VST1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST2
+ InstrItinData<IIC_VST2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2x2
+ InstrItinData<IIC_VST2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2u
+ InstrItinData<IIC_VST2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST2x2u
+ InstrItinData<IIC_VST2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2ln
+ InstrItinData<IIC_VST2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2lnu
+ InstrItinData<IIC_VST2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST3
+ InstrItinData<IIC_VST3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3u
+ InstrItinData<IIC_VST3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST3ln
+ InstrItinData<IIC_VST3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3lnu
+ InstrItinData<IIC_VST3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST4
+ InstrItinData<IIC_VST4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4u
+ InstrItinData<IIC_VST4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4ln
+ InstrItinData<IIC_VST4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4lnu
+ InstrItinData<IIC_VST4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // Double-register FP Unary
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2]>,
+ //
+ // Quad-register FP Unary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [6, 2]>,
+ //
+ // Double-register FP Binary
+ InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 1]>,
+
+ //
+ // Quad-register FP Binary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 2, 1]>,
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [1, 1]>,
+ //
+ // Move Immediate
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3]>,
+ //
+ // Double-register Permute Move
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+ //
+ // Quad-register Permute Move
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 3 for those cases
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
+ //
+ // Integer to Lane Move
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+ //
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [2, 1]>,
+ //
+ // Double-register Permute
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
+ //
+ // Quad-register Permute
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 3 for those cases
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
+ //
+ // Quad-register Permute (3 cycle issue)
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
+ //
+ // Double-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
+ //
+ // Quad-register FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
+ //
+ // Double-register Reciprical Step
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
+ //
+ // Quad-register Reciprical Step
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
+ //
+ // Double-register Integer Count
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+ //
+ // Quad-register Integer Count
+ // Result written in N3, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
+ //
+ // Double-register Integer Unary
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2]>,
+ //
+ // Quad-register Integer Unary
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2]>,
+ //
+ // Double-register Integer Q-Unary
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 1]>,
+ //
+ // Quad-register Integer CountQ-Unary
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 1]>,
+ //
+ // Double-register Integer Binary
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+ //
+ // Quad-register Integer Binary
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+ //
+ // Double-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+ //
+ // Quad-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+ //
+ // Double-register Integer Shift
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
+ //
+ // Quad-register Integer Shift
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
+ //
+ // Double-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
+ //
+ // Quad-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
+ //
+ // Double-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
+ //
+ // Quad-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
+ //
+ // Double-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
+ //
+ // Quad-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
+
+ //
+ // Double-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
+ //
+ // Double-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
+ //
+ // Quad-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
+ //
+ // Quad-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
+ //
+ // Double-register VEXT
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+ //
+ // Quad-register VEXT
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+ //
+ // VTB
+ InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
+ //
+ // VTBX
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
new file mode 100644
index 0000000..82c6735
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
@@ -0,0 +1,1824 @@
+//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A9 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
+// Reference Manual".
+//
+// Functional units
+def A9_Issue0 : FuncUnit; // Issue 0
+def A9_Issue1 : FuncUnit; // Issue 1
+def A9_Branch : FuncUnit; // Branch
+def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0
+def A9_ALU1 : FuncUnit; // ALU pipeline 1
+def A9_AGU : FuncUnit; // Address generation unit for ld / st
+def A9_NPipe : FuncUnit; // NEON pipeline
+def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
+def A9_LSUnit : FuncUnit; // L/S Unit
+def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
+def A9_DRegsN : FuncUnit; // FP register set, NEON side
+
+// Bypasses
+def A9_LdBypass : Bypass;
+
+def CortexA9Itineraries : ProcessorItineraries<
+ [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
+ A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
+ [A9_LdBypass], [
+ // Two fully-pipelined integer ALU pipelines
+
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [5]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [3, 1, 1]>,
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [3, 1, 1, 1],
+ [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+ InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+
+ // CLZ, RBIT, etc.
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+
+ // BFC, BFI, UBFX, SBFX
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1], [A9_LdBypass]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [A9_LdBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [A9_LdBypass, NoBypass]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+ //
+ // Move instructions, conditional
+ // FIXME: Correctly model the extra input dep on the destination.
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>],
+ [4, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0]>],
+ [4, 5, 1, 1]>,
+ // Integer load pipeline
+ // FIXME: The timings are some rough approximations
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 1], [A9_LdBypass]>,
+ // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1], [A9_LdBypass]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1, 1], [A9_LdBypass]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit], 0>],
+ [4, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [5, 1, 1], [A9_LdBypass]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 2, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1], [A9_LdBypass]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 2, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1, 1], [A9_LdBypass]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [5, 4, 1, 1], [A9_LdBypass]>,
+ //
+ // Load multiple, def is the 5th operand.
+ // FIXME: This assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>,
+ InstrStage<1, [A9_Branch]>],
+ [1, 2, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 3],
+ [NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>,
+ InstrStage<1, [A9_Branch]>],
+ [1, 1, 3],
+ [NoBypass, NoBypass, A9_LdBypass]>,
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [2, 1]>,
+
+ // Integer store pipeline
+ ///
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<2, [A9_LSUnit]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<2, [A9_LSUnit]>], [2]>,
+
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>,
+ InstrStage<1, [A9_Issue1], 0>,
+ InstrStage<1, [A9_Branch]>]>,
+
+ // VFP and NEON shares the same register file. This means that every VFP
+ // instruction should wait for full completion of the consecutive NEON
+ // instruction and vice-versa. We model this behavior with two artificial FUs:
+ // DRegsVFP and DRegsVFP.
+ //
+ // Every VFP instruction:
+ // - Acquires DRegsVFP resource for 1 cycle
+ // - Reserves DRegsN resource for the whole duration (including time to
+ // register file writeback!).
+ // Every NEON instruction does the same but with FUs swapped.
+ //
+ // Since the reserved FU cannot be acquired, this models precisely
+ // "cross-domain" stalls.
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit.
+
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 4 cycles
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 4 cycles
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+
+ //
+ // Single to Half FP Convert
+ InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Half to Single FP Convert
+ InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<6, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 1, 1]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<7, [A9_DRegsN], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 1, 1]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<9, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [8, 1, 1, 1]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<10, [A9_DRegsN], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [9, 1, 1, 1]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<16, [A9_DRegsN], 0, Reserved>,
+ InstrStage<10, [A9_NPipe]>],
+ [15, 1, 1]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<26, [A9_DRegsN], 0, Reserved>,
+ InstrStage<20, [A9_NPipe]>],
+ [25, 1, 1]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<18, [A9_DRegsN], 0, Reserved>,
+ InstrStage<13, [A9_NPipe]>],
+ [17, 1]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<33, [A9_DRegsN], 0, Reserved>,
+ InstrStage<28, [A9_NPipe]>],
+ [32, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra 1 latency cycle since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra 1 latency cycle since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1, 1]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Load
+ // FIXME: Result latency is 1 if address is 64-bit aligned.
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1]>,
+ //
+ // FP Load Multiple
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ //
+ // FP Load Multiple + update
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Store
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ //
+ // FP Store Multiple
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ //
+ // FP Store Multiple + update
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
+ // NEON
+ // VLD1
+ // FIXME: Conservatively assume insufficent alignment.
+ InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1]>,
+ // VLD1x2
+ InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 1]>,
+ // VLD1x3
+ InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 2, 3, 1]>,
+ // VLD1x4
+ InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 2, 3, 3, 1]>,
+ // VLD1u
+ InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 1]>,
+ // VLD1x2u
+ InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 2, 1]>,
+ // VLD1x3u
+ InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 2, 3, 2, 1]>,
+ // VLD1x4u
+ InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 2, 3, 3, 2, 1]>,
+ //
+ // VLD1ln
+ InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [4, 1, 1, 1]>,
+ //
+ // VLD1lnu
+ InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [4, 2, 1, 1, 1, 1]>,
+ //
+ // VLD1dup
+ InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 1]>,
+ //
+ // VLD1dupu
+ InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 2, 1, 1]>,
+ //
+ // VLD2
+ InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 1]>,
+ //
+ // VLD2x2
+ InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 4, 3, 4, 1]>,
+ //
+ // VLD2ln
+ InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [4, 4, 1, 1, 1, 1]>,
+ //
+ // VLD2u
+ InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 2, 1, 1, 1]>,
+ //
+ // VLD2x2u
+ InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 4, 3, 4, 2, 1]>,
+ //
+ // VLD2lnu
+ InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [4, 4, 2, 1, 1, 1, 1, 1]>,
+ //
+ // VLD2dup
+ InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 1]>,
+ //
+ // VLD2dupu
+ InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 2, 1, 1]>,
+ //
+ // VLD3
+ InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 1]>,
+ //
+ // VLD3ln
+ InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3u
+ InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 2, 1]>,
+ //
+ // VLD3lnu
+ InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3dup
+ InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3dupu
+ InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 2, 1, 1]>,
+ //
+ // VLD4
+ InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 1]>,
+ //
+ // VLD4ln
+ InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4u
+ InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 2, 1]>,
+ //
+ // VLD4lnu
+ InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4dup
+ InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 1]>,
+ //
+ // VLD4dupu
+ InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 2, 1, 1]>,
+ //
+ // VST1
+ InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ //
+ // VST1x2
+ InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST1x3
+ InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST1x4
+ InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1u
+ InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST1x2u
+ InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST1x3u
+ InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST1x4u
+ InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1ln
+ InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ //
+ // VST1lnu
+ InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST2
+ InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2x2
+ InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2u
+ InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST2x2u
+ InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2ln
+ InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2lnu
+ InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST3
+ InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3u
+ InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST3ln
+ InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3lnu
+ InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST4
+ InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4u
+ InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4ln
+ InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4lnu
+ InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+
+ //
+ // Double-register Integer Unary
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2]>,
+ //
+ // Quad-register Integer Unary
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2]>,
+ //
+ // Double-register Integer Q-Unary
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Quad-register Integer CountQ-Unary
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Double-register Integer Binary
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
+ //
+ // Quad-register Integer Binary
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 1]>,
+ //
+ // Double-register Integer Shift
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1, 1]>,
+ //
+ // Quad-register Integer Shift
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1, 1]>,
+ //
+ // Double-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 2]>,
+ //
+ // Quad-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 2]>,
+ //
+ // Double-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 1]>,
+ //
+ // Quad-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 1]>,
+
+ //
+ // Double-register Integer Count
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
+ //
+ // Quad-register Integer Count
+ // Result written in N3, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [4, 2, 2]>,
+ //
+ // Double-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Quad-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Double-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 1]>,
+ //
+ // Quad-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 1]>,
+
+ //
+ // Double-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 2, 2]>,
+ //
+ // Quad-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 2, 2]>,
+
+ //
+ // Double-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 2, 1]>,
+ //
+ // Quad-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [9, 2, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 2, 2]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 3, 2, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 3, 2, 2]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [9, 3, 2, 1]>,
+
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1,1]>,
+ //
+ // Move Immediate
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3]>,
+ //
+ // Double-register Permute Move
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Quad-register Permute Move
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 2, 1]>,
+ //
+ // Integer to Lane Move
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 1]>,
+
+ //
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1]>,
+ //
+ // Double-register FP Unary
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2]>,
+ //
+ // Quad-register FP Unary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 2]>,
+ //
+ // Double-register FP Binary
+ // FIXME: We're using this itin for many instructions and [2, 2] here is too
+ // optimistic.
+ InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2, 2]>,
+
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 1, 1]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2, 1]>,
+ //
+ // Quad-register FP Binary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ // FIXME: We're using this itin for many instructions and [2, 2] here is too
+ // optimistic.
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 2, 2]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 2, 1]>,
+ //
+ // Double-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Quad-register FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [8, 4, 2, 1]>,
+ //
+ // Double-register Reciprical Step
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 10 cycles
+ InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [9, 2, 2]>,
+ //
+ // Quad-register Reciprical Step
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 11 cycles
+ InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [10, 2, 2]>,
+ //
+ // Double-register Permute
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 2, 1, 1]>,
+ //
+ // Quad-register Permute
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 3 for those cases
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 3, 1, 1]>,
+ //
+ // Quad-register Permute (3 cycle issue)
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 4, 1, 1]>,
+
+ //
+ // Double-register VEXT
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register VEXT
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2]>,
+ //
+ // VTB
+ InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 2, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 2, 2, 1]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 2, 2, 3, 3, 1]>,
+ //
+ // VTBX
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2, 2, 1]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 1, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [4, 1, 2, 2, 3, 3, 1]>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td
new file mode 100644
index 0000000..c1880a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td
@@ -0,0 +1,294 @@
+//===- ARMScheduleV6.td - ARM v6 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM v6 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Model based on ARM1176
+//
+// Functional Units
+def V6_Pipe : FuncUnit; // pipeline
+
+// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual"
+//
+def ARMV6Itineraries : ProcessorItineraries<
+ [V6_Pipe], [], [
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [V6_Pipe]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ InstrItinData<IIC_iBITsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iBITsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
+ // Unary Instructions that produce a result
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [V6_Pipe]>], [1, 1]>,
+ InstrItinData<IIC_iEXTAr , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iEXTAsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [5]>,
+ //
+ // Move instructions, conditional
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [V6_Pipe]>], [3]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [V6_Pipe]>], [3, 2]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [V6_Pipe]>], [3, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ InstrItinData<IIC_iCMOVix2 , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [4]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>,
+
+ // Integer load pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iLoad_si , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_si, [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu, [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru, [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iLoad_siu, [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+
+ //
+ // Load multiple, def is the 5th operand.
+ InstrItinData<IIC_iLoad_m , [InstrStage<3, [V6_Pipe]>], [1, 1, 1, 1, 4]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 1, 4]>,
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [1, 2, 1, 1, 4]>,
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [3, 1]>,
+
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<3, [V6_Pipe]>], [1, 1, 4]>,
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<3, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [1, 2, 4]>,
+
+ // Integer store pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iStore_bh_i, [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iStore_d_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r, [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iStore_si , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_si, [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_iu, [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iStore_siu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStore_m , [InstrStage<3, [V6_Pipe]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [V6_Pipe]>]>,
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit. We assume
+ // RunFast mode so that NFP pipeline is used for single-precision when
+ // possible.
+ //
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [V6_Pipe]>], [3]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [V6_Pipe]>], [10, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [V6_Pipe]>], [10, 10, 1]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
+ //
+ // FP Load Multiple
+ InstrItinData<IIC_fpLoad_m , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 5]>,
+ //
+ // FP Load Multiple + update
+ InstrItinData<IIC_fpLoad_mu, [InstrStage<3, [V6_Pipe]>], [3, 2, 1, 1, 5]>,
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ //
+ // Double-precision FP Store
+ // use FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ //
+ // FP Store Multiple
+ InstrItinData<IIC_fpStore_m, [InstrStage<3, [V6_Pipe]>], [2, 2, 2, 2]>,
+ //
+ // FP Store Multiple + update
+ InstrItinData<IIC_fpStore_mu,[InstrStage<3, [V6_Pipe]>], [3, 2, 2, 2, 2]>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
new file mode 100644
index 0000000..2b9202b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -0,0 +1,134 @@
+//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-selectiondag-info"
+#include "ARMTargetMachine.h"
+using namespace llvm;
+
+ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+}
+
+ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
+}
+
+SDValue
+ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ // Do repeated 4-byte loads and stores. To be improved.
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDValue();
+ // This requires the copy size to be a constant, preferrably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
+ unsigned EmittedNumMemOps = 0;
+ EVT VT = MVT::i32;
+ unsigned VTSize = 4;
+ unsigned i = 0;
+ const unsigned MAX_LOADS_IN_LDM = 6;
+ SDValue TFOps[MAX_LOADS_IN_LDM];
+ SDValue Loads[MAX_LOADS_IN_LDM];
+ uint64_t SrcOff = 0, DstOff = 0;
+
+ // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+ // same number of stores. The loads and stores will get combined into
+ // ldm/stm later on.
+ while (EmittedNumMemOps < NumMemOps) {
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
+ false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstPtrInfo.getWithOffset(DstOff),
+ isVolatile, false, 0);
+ DstOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ EmittedNumMemOps += i;
+ }
+
+ if (BytesLeft == 0)
+ return Chain;
+
+ // Issue loads / stores for the trailing (1 - 3) bytes.
+ unsigned BytesLeftSave = BytesLeft;
+ i = 0;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ ++i;
+ SrcOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ i = 0;
+ BytesLeft = BytesLeftSave;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstPtrInfo.getWithOffset(DstOff), false, false, 0);
+ ++i;
+ DstOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
new file mode 100644
index 0000000..7533690
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -0,0 +1,42 @@
+//===-- ARMSelectionDAGInfo.h - ARM SelectionDAG Info -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSELECTIONDAGINFO_H
+#define ARMSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+public:
+ explicit ARMSelectionDAGInfo(const TargetMachine &TM);
+ ~ARMSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
new file mode 100644
index 0000000..0bd740c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -0,0 +1,252 @@
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMSubtarget.h"
+#include "ARMGenSubtarget.inc"
+#include "ARMBaseRegisterInfo.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+static cl::opt<bool>
+ReserveR9("arm-reserve-r9", cl::Hidden,
+ cl::desc("Reserve R9, making it unavailable as GPR"));
+
+static cl::opt<bool>
+DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+StrictAlign("arm-strict-align", cl::Hidden,
+ cl::desc("Disallow all unaligned memory accesses"));
+
+ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
+ bool isT)
+ : ARMArchVersion(V4)
+ , ARMProcFamily(Others)
+ , ARMFPUType(None)
+ , UseNEONForSinglePrecisionFP(false)
+ , SlowFPVMLx(false)
+ , SlowFPBrcc(false)
+ , IsThumb(isT)
+ , ThumbMode(Thumb1)
+ , NoARM(false)
+ , PostRAScheduler(false)
+ , IsR9Reserved(ReserveR9)
+ , UseMovt(false)
+ , HasFP16(false)
+ , HasD16(false)
+ , HasHardwareDivide(false)
+ , HasT2ExtractPack(false)
+ , HasDataBarrier(false)
+ , Pref32BitThumb(false)
+ , HasMPExtension(false)
+ , FPOnlySP(false)
+ , AllowsUnalignedMem(false)
+ , stackAlignment(4)
+ , CPUString("generic")
+ , TargetTriple(TT)
+ , TargetABI(ARM_ABI_APCS) {
+ // Default to soft float ABI
+ if (FloatABIType == FloatABI::Default)
+ FloatABIType = FloatABI::Soft;
+
+ // Determine default and user specified characteristics
+
+ // When no arch is specified either by CPU or by attributes, make the default
+ // ARMv4T.
+ const char *ARMArchFeature = "";
+ if (CPUString == "generic" && (FS.empty() || FS == "generic")) {
+ ARMArchVersion = V4T;
+ ARMArchFeature = ",+v4t";
+ }
+
+ // Set the boolean corresponding to the current target triple, or the default
+ // if one cannot be determined, to true.
+ unsigned Len = TT.length();
+ unsigned Idx = 0;
+
+ if (Len >= 5 && TT.substr(0, 4) == "armv")
+ Idx = 4;
+ else if (Len >= 6 && TT.substr(0, 5) == "thumb") {
+ IsThumb = true;
+ if (Len >= 7 && TT[5] == 'v')
+ Idx = 6;
+ }
+ if (Idx) {
+ unsigned SubVer = TT[Idx];
+ if (SubVer >= '7' && SubVer <= '9') {
+ ARMArchVersion = V7A;
+ ARMArchFeature = ",+v7a";
+ if (Len >= Idx+2 && TT[Idx+1] == 'm') {
+ ARMArchVersion = V7M;
+ ARMArchFeature = ",+v7m";
+ }
+ } else if (SubVer == '6') {
+ ARMArchVersion = V6;
+ ARMArchFeature = ",+v6";
+ if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') {
+ ARMArchVersion = V6T2;
+ ARMArchFeature = ",+v6t2";
+ }
+ } else if (SubVer == '5') {
+ ARMArchVersion = V5T;
+ ARMArchFeature = ",+v5t";
+ if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') {
+ ARMArchVersion = V5TE;
+ ARMArchFeature = ",+v5te";
+ }
+ } else if (SubVer == '4') {
+ if (Len >= Idx+2 && TT[Idx+1] == 't') {
+ ARMArchVersion = V4T;
+ ARMArchFeature = ",+v4t";
+ } else {
+ ARMArchVersion = V4;
+ ARMArchFeature = "";
+ }
+ }
+ }
+
+ if (TT.find("eabi") != std::string::npos)
+ TargetABI = ARM_ABI_AAPCS;
+
+ // Parse features string. If the first entry in FS (the CPU) is missing,
+ // insert the architecture feature derived from the target triple. This is
+ // important for setting features that are implied based on the architecture
+ // version.
+ std::string FSWithArch;
+ if (FS.empty())
+ FSWithArch = std::string(ARMArchFeature);
+ else if (FS.find(',') == 0)
+ FSWithArch = std::string(ARMArchFeature) + FS;
+ else
+ FSWithArch = FS;
+ CPUString = ParseSubtargetFeatures(FSWithArch, CPUString);
+
+ // After parsing Itineraries, set ItinData.IssueWidth.
+ computeIssueWidth();
+
+ // Thumb2 implies at least V6T2.
+ if (ARMArchVersion >= V6T2)
+ ThumbMode = Thumb2;
+ else if (ThumbMode >= Thumb2)
+ ARMArchVersion = V6T2;
+
+ if (isAAPCS_ABI())
+ stackAlignment = 8;
+
+ if (!isTargetDarwin())
+ UseMovt = hasV6T2Ops();
+ else {
+ IsR9Reserved = ReserveR9 | (ARMArchVersion < V6);
+ UseMovt = DarwinUseMOVT && hasV6T2Ops();
+ }
+
+ if (!isThumb() || hasThumb2())
+ PostRAScheduler = true;
+
+ // v6+ may or may not support unaligned mem access depending on the system
+ // configuration.
+ if (!StrictAlign && hasV6Ops() && isTargetDarwin())
+ AllowsUnalignedMem = true;
+}
+
+/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
+bool
+ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
+ Reloc::Model RelocM) const {
+ if (RelocM == Reloc::Static)
+ return false;
+
+ // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+ // load from stub.
+ bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+
+ if (!isTargetDarwin()) {
+ // Extra load is needed for all externally visible.
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return false;
+ return true;
+ } else {
+ if (RelocM == Reloc::PIC_) {
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return false;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return true;
+
+ // If symbol visibility is hidden, we have a stub for common symbol
+ // references and external declarations.
+ if (isDecl || GV->hasCommonLinkage())
+ // Hidden $non_lazy_ptr reference.
+ return true;
+
+ return false;
+ } else {
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return false;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return true;
+ }
+ }
+
+ return false;
+}
+
+unsigned ARMSubtarget::getMispredictionPenalty() const {
+ // If we have a reasonable estimate of the pipeline depth, then we can
+ // estimate the penalty of a misprediction based on that.
+ if (isCortexA8())
+ return 13;
+ else if (isCortexA9())
+ return 8;
+
+ // Otherwise, just return a sensible default.
+ return 10;
+}
+
+void ARMSubtarget::computeIssueWidth() {
+ unsigned allStage1Units = 0;
+ for (const InstrItinerary *itin = InstrItins.Itineraries;
+ itin->FirstStage != ~0U; ++itin) {
+ const InstrStage *IS = InstrItins.Stages + itin->FirstStage;
+ allStage1Units |= IS->getUnits();
+ }
+ InstrItins.IssueWidth = 0;
+ while (allStage1Units) {
+ ++InstrItins.IssueWidth;
+ // clear the lowest bit
+ allStage1Units ^= allStage1Units & ~(allStage1Units - 1);
+ }
+ assert(InstrItins.IssueWidth <= 2 && "itinerary bug, too many stage 1 units");
+}
+
+bool ARMSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(&ARM::GPRRegClass);
+ return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
new file mode 100644
index 0000000..76c1c3f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -0,0 +1,234 @@
+//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSUBTARGET_H
+#define ARMSUBTARGET_H
+
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/ADT/Triple.h"
+#include <string>
+
+namespace llvm {
+class GlobalValue;
+
+class ARMSubtarget : public TargetSubtarget {
+protected:
+ enum ARMArchEnum {
+ V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M
+ };
+
+ enum ARMProcFamilyEnum {
+ Others, CortexA8, CortexA9
+ };
+
+ enum ARMFPEnum {
+ None, VFPv2, VFPv3, NEON
+ };
+
+ enum ThumbTypeEnum {
+ Thumb1,
+ Thumb2
+ };
+
+ /// ARMArchVersion - ARM architecture version: V4, V4T (base), V5T, V5TE,
+ /// V6, V6T2, V7A, V7M.
+ ARMArchEnum ARMArchVersion;
+
+ /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
+ ARMProcFamilyEnum ARMProcFamily;
+
+ /// ARMFPUType - Floating Point Unit type.
+ ARMFPEnum ARMFPUType;
+
+ /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
+ /// specified. Use the method useNEONForSinglePrecisionFP() to
+ /// determine if NEON should actually be used.
+ bool UseNEONForSinglePrecisionFP;
+
+ /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
+ /// whether the FP VML[AS] instructions are slow (if so, don't use them).
+ bool SlowFPVMLx;
+
+ /// SlowFPBrcc - True if floating point compare + branch is slow.
+ bool SlowFPBrcc;
+
+ /// IsThumb - True if we are in thumb mode, false if in ARM mode.
+ bool IsThumb;
+
+ /// ThumbMode - Indicates supported Thumb version.
+ ThumbTypeEnum ThumbMode;
+
+ /// NoARM - True if subtarget does not support ARM mode execution.
+ bool NoARM;
+
+ /// PostRAScheduler - True if using post-register-allocation scheduler.
+ bool PostRAScheduler;
+
+ /// IsR9Reserved - True if R9 is a not available as general purpose register.
+ bool IsR9Reserved;
+
+ /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit
+ /// imms (including global addresses).
+ bool UseMovt;
+
+ /// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF
+ /// only so far)
+ bool HasFP16;
+
+ /// HasD16 - True if subtarget is limited to 16 double precision
+ /// FP registers for VFPv3.
+ bool HasD16;
+
+ /// HasHardwareDivide - True if subtarget supports [su]div
+ bool HasHardwareDivide;
+
+ /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
+ /// instructions.
+ bool HasT2ExtractPack;
+
+ /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
+ /// instructions.
+ bool HasDataBarrier;
+
+ /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
+ /// over 16-bit ones.
+ bool Pref32BitThumb;
+
+ /// HasMPExtension - True if the subtarget supports Multiprocessing
+ /// extension (ARMv7 only).
+ bool HasMPExtension;
+
+ /// FPOnlySP - If true, the floating point unit only supports single
+ /// precision.
+ bool FPOnlySP;
+
+ /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
+ /// accesses for some types. For details, see
+ /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
+ bool AllowsUnalignedMem;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned stackAlignment;
+
+ /// CPUString - String name of used CPU.
+ std::string CPUString;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ public:
+ enum {
+ isELF, isDarwin
+ } TargetType;
+
+ enum {
+ ARM_ABI_APCS,
+ ARM_ABI_AAPCS // ARM EABI
+ } TargetABI;
+
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ ARMSubtarget(const std::string &TT, const std::string &FS, bool isThumb);
+
+ /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+ /// that still makes it profitable to inline the call.
+ unsigned getMaxInlineSizeThreshold() const {
+ // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb1.
+ // Change this once Thumb1 ldmia / stmia support is added.
+ return isThumb1Only() ? 0 : 64;
+ }
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+ void computeIssueWidth();
+
+ bool hasV4TOps() const { return ARMArchVersion >= V4T; }
+ bool hasV5TOps() const { return ARMArchVersion >= V5T; }
+ bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
+ bool hasV6Ops() const { return ARMArchVersion >= V6; }
+ bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
+ bool hasV7Ops() const { return ARMArchVersion >= V7A; }
+
+ bool isCortexA8() const { return ARMProcFamily == CortexA8; }
+ bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+
+ bool hasARMOps() const { return !NoARM; }
+
+ bool hasVFP2() const { return ARMFPUType >= VFPv2; }
+ bool hasVFP3() const { return ARMFPUType >= VFPv3; }
+ bool hasNEON() const { return ARMFPUType >= NEON; }
+ bool useNEONForSinglePrecisionFP() const {
+ return hasNEON() && UseNEONForSinglePrecisionFP; }
+ bool hasDivide() const { return HasHardwareDivide; }
+ bool hasT2ExtractPack() const { return HasT2ExtractPack; }
+ bool hasDataBarrier() const { return HasDataBarrier; }
+ bool useFPVMLx() const { return !SlowFPVMLx; }
+ bool isFPBrccSlow() const { return SlowFPBrcc; }
+ bool isFPOnlySP() const { return FPOnlySP; }
+ bool prefers32BitThumb() const { return Pref32BitThumb; }
+ bool hasMPExtension() const { return HasMPExtension; }
+
+ bool hasFP16() const { return HasFP16; }
+ bool hasD16() const { return HasD16; }
+
+ bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
+ bool isTargetELF() const { return !isTargetDarwin(); }
+
+ bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
+ bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
+
+ bool isThumb() const { return IsThumb; }
+ bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); }
+ bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); }
+ bool hasThumb2() const { return ThumbMode >= Thumb2; }
+
+ bool isR9Reserved() const { return IsR9Reserved; }
+
+ bool useMovt() const { return UseMovt && hasV6T2Ops(); }
+
+ bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+
+ const std::string & getCPUString() const { return CPUString; }
+
+ unsigned getMispredictionPenalty() const;
+
+ /// enablePostRAScheduler - True at 'More' optimization.
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return stackAlignment; }
+
+ /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
+ /// symbol.
+ bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+};
+} // End llvm namespace
+
+#endif // ARMSUBTARGET_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
new file mode 100644
index 0000000..0ee773b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -0,0 +1,202 @@
+//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetMachine.h"
+#include "ARMMCAsmInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARM.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+static cl::opt<bool>ExpandMLx("expand-fp-mlx", cl::init(false), cl::Hidden);
+
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+ switch (TheTriple.getOS()) {
+ case Triple::Darwin:
+ return new ARMMCAsmInfoDarwin();
+ default:
+ return new ARMELFMCAsmInfo();
+ }
+}
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+ MCContext &Ctx, TargetAsmBackend &TAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ switch (Triple(TT).getOS()) {
+ case Triple::Darwin:
+ return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
+ case Triple::MinGW32:
+ case Triple::Cygwin:
+ case Triple::Win32:
+ llvm_unreachable("ARM does not support Windows COFF format");
+ return NULL;
+ default:
+ return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
+ }
+}
+
+extern "C" void LLVMInitializeARMTarget() {
+ // Register the target.
+ RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
+ RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
+
+ // Register the target asm info.
+ RegisterAsmInfoFn A(TheARMTarget, createMCAsmInfo);
+ RegisterAsmInfoFn B(TheThumbTarget, createMCAsmInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
+ TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterAsmBackend(TheARMTarget, createARMAsmBackend);
+ TargetRegistry::RegisterAsmBackend(TheThumbTarget, createARMAsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterObjectStreamer(TheARMTarget, createMCStreamer);
+ TargetRegistry::RegisterObjectStreamer(TheThumbTarget, createMCStreamer);
+
+}
+
+/// TargetMachine ctor - Create an ARM architecture model.
+///
+ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T,
+ const std::string &TT,
+ const std::string &FS,
+ bool isThumb)
+ : LLVMTargetMachine(T, TT),
+ Subtarget(TT, FS, isThumb),
+ JITInfo(),
+ InstrItins(Subtarget.getInstrItineraryData())
+{
+ DefRelocModel = getRelocationModel();
+}
+
+ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
+ const std::string &FS)
+ : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget),
+ DataLayout(Subtarget.isAPCS_ABI() ?
+ std::string("e-p:32:32-f64:32:64-i64:32:64-"
+ "v128:32:128-v64:32:64-n32") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "v128:64:128-v64:64:64-n32")),
+ ELFWriterInfo(*this),
+ TLInfo(*this),
+ TSInfo(*this),
+ FrameLowering(Subtarget) {
+ if (!Subtarget.hasARMOps())
+ report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
+ "support ARM mode execution!");
+}
+
+ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
+ const std::string &FS)
+ : ARMBaseTargetMachine(T, TT, FS, true),
+ InstrInfo(Subtarget.hasThumb2()
+ ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
+ : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
+ DataLayout(Subtarget.isAPCS_ABI() ?
+ std::string("e-p:32:32-f64:32:64-i64:32:64-"
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:32:128-v64:32:64-a:0:32-n32") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:64:128-v64:64:64-a:0:32-n32")),
+ ELFWriterInfo(*this),
+ TLInfo(*this),
+ TSInfo(*this),
+ FrameLowering(Subtarget.hasThumb2()
+ ? new ARMFrameLowering(Subtarget)
+ : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+}
+
+// Pass Pipeline Configuration
+bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createARMGlobalMergePass(getTargetLowering()));
+
+ return false;
+}
+
+bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createARMISelDag(*this, OptLevel));
+ return false;
+}
+
+bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // FIXME: temporarily disabling load / store optimization pass for Thumb1.
+ if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
+ PM.add(createARMLoadStoreOptimizationPass(true));
+ if (ExpandMLx &&
+ OptLevel != CodeGenOpt::None && Subtarget.hasVFP2())
+ PM.add(createMLxExpansionPass());
+
+ return true;
+}
+
+bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // FIXME: temporarily disabling load / store optimization pass for Thumb1.
+ if (OptLevel != CodeGenOpt::None) {
+ if (!Subtarget.isThumb1Only())
+ PM.add(createARMLoadStoreOptimizationPass());
+ if (Subtarget.hasNEON())
+ PM.add(createNEONMoveFixPass());
+ }
+
+ // Expand some pseudo instructions into multiple instructions to allow
+ // proper scheduling.
+ PM.add(createARMExpandPseudoPass());
+
+ if (OptLevel != CodeGenOpt::None) {
+ if (!Subtarget.isThumb1Only())
+ PM.add(createIfConverterPass());
+ }
+ if (Subtarget.isThumb2())
+ PM.add(createThumb2ITBlockPass());
+
+ return true;
+}
+
+bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
+ PM.add(createThumb2SizeReductionPass());
+
+ PM.add(createARMConstantIslandPass());
+ return true;
+}
+
+bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE) {
+ // FIXME: Move this to TargetJITInfo!
+ if (DefRelocModel == Reloc::Default)
+ setRelocationModel(Reloc::Static);
+
+ // Machine code emitter pass for ARM.
+ PM.add(createARMJITCodeEmitterPass(*this, JCE));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
new file mode 100644
index 0000000..e0aa149
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -0,0 +1,143 @@
+//===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMTARGETMACHINE_H
+#define ARMTARGETMACHINE_H
+
+#include "ARMInstrInfo.h"
+#include "ARMELFWriterInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARMJITInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+#include "ARMSelectionDAGInfo.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/OwningPtr.h"
+
+namespace llvm {
+
+class ARMBaseTargetMachine : public LLVMTargetMachine {
+protected:
+ ARMSubtarget Subtarget;
+private:
+ ARMJITInfo JITInfo;
+ InstrItineraryData InstrItins;
+ Reloc::Model DefRelocModel; // Reloc model before it's overridden.
+
+public:
+ ARMBaseTargetMachine(const Target &T, const std::string &TT,
+ const std::string &FS, bool isThumb);
+
+ virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
+ virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &MCE);
+};
+
+/// ARMTargetMachine - ARM target machine.
+///
+class ARMTargetMachine : public ARMBaseTargetMachine {
+ ARMInstrInfo InstrInfo;
+ const TargetData DataLayout; // Calculates type size & alignment
+ ARMELFWriterInfo ELFWriterInfo;
+ ARMTargetLowering TLInfo;
+ ARMSelectionDAGInfo TSInfo;
+ ARMFrameLowering FrameLowering;
+ public:
+ ARMTargetMachine(const Target &T, const std::string &TT,
+ const std::string &FS);
+
+ virtual const ARMRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const ARMTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const ARMFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+
+ virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+ return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+ }
+};
+
+/// ThumbTargetMachine - Thumb target machine.
+/// Due to the way architectures are handled, this represents both
+/// Thumb-1 and Thumb-2.
+///
+class ThumbTargetMachine : public ARMBaseTargetMachine {
+ // Either Thumb1InstrInfo or Thumb2InstrInfo.
+ OwningPtr<ARMBaseInstrInfo> InstrInfo;
+ const TargetData DataLayout; // Calculates type size & alignment
+ ARMELFWriterInfo ELFWriterInfo;
+ ARMTargetLowering TLInfo;
+ ARMSelectionDAGInfo TSInfo;
+ // Either Thumb1FrameLowering or ARMFrameLowering.
+ OwningPtr<ARMFrameLowering> FrameLowering;
+public:
+ ThumbTargetMachine(const Target &T, const std::string &TT,
+ const std::string &FS);
+
+ /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
+ virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+
+ virtual const ARMTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const ARMSelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ /// returns either Thumb1InstrInfo or Thumb2InstrInfo
+ virtual const ARMBaseInstrInfo *getInstrInfo() const {
+ return InstrInfo.get();
+ }
+ /// returns either Thumb1FrameLowering or ARMFrameLowering
+ virtual const ARMFrameLowering *getFrameLowering() const {
+ return FrameLowering.get();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+ return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
new file mode 100644
index 0000000..7535da5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -0,0 +1,46 @@
+//===-- llvm/Target/ARMTargetObjectFile.cpp - ARM Object Info Impl --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetObjectFile.h"
+#include "ARMSubtarget.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+// ELF Target
+//===----------------------------------------------------------------------===//
+
+void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+ if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
+ StaticCtorSection =
+ getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ StaticDtorSection =
+ getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ }
+
+ AttributesSection =
+ getContext().getELFSection(".ARM.attributes",
+ ELF::SHT_ARM_ATTRIBUTES,
+ 0,
+ SectionKind::getMetadata());
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
new file mode 100644
index 0000000..c6a7261
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
@@ -0,0 +1,38 @@
+//===-- llvm/Target/ARMTargetObjectFile.h - ARM Object Info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+#define LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+class MCContext;
+class TargetMachine;
+
+class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
+protected:
+ const MCSection *AttributesSection;
+public:
+ ARMElfTargetObjectFile() :
+ TargetLoweringObjectFileELF(),
+ AttributesSection(NULL)
+ {}
+
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ virtual const MCSection *getAttributesSection() const {
+ return AttributesSection;
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
new file mode 100644
index 0000000..2428ce1
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -0,0 +1,152 @@
+//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+
+#include <string>
+#include <map>
+
+using namespace llvm;
+
+namespace {
+
+class ARMBaseAsmLexer : public TargetAsmLexer {
+ const MCAsmInfo &AsmInfo;
+
+ const AsmToken &lexDefinite() {
+ return getLexer()->Lex();
+ }
+
+ AsmToken LexTokenUAL();
+protected:
+ typedef std::map <std::string, unsigned> rmap_ty;
+
+ rmap_ty RegisterMap;
+
+ void InitRegisterMap(const TargetRegisterInfo *info) {
+ unsigned numRegs = info->getNumRegs();
+
+ for (unsigned i = 0; i < numRegs; ++i) {
+ const char *regName = info->getName(i);
+ if (regName)
+ RegisterMap[regName] = i;
+ }
+ }
+
+ unsigned MatchRegisterName(StringRef Name) {
+ rmap_ty::iterator iter = RegisterMap.find(Name.str());
+ if (iter != RegisterMap.end())
+ return iter->second;
+ else
+ return 0;
+ }
+
+ AsmToken LexToken() {
+ if (!Lexer) {
+ SetError(SMLoc(), "No MCAsmLexer installed");
+ return AsmToken(AsmToken::Error, "", 0);
+ }
+
+ switch (AsmInfo.getAssemblerDialect()) {
+ default:
+ SetError(SMLoc(), "Unhandled dialect");
+ return AsmToken(AsmToken::Error, "", 0);
+ case 0:
+ return LexTokenUAL();
+ }
+ }
+public:
+ ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : TargetAsmLexer(T), AsmInfo(MAI) {
+ }
+};
+
+class ARMAsmLexer : public ARMBaseAsmLexer {
+public:
+ ARMAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : ARMBaseAsmLexer(T, MAI) {
+ std::string tripleString("arm-unknown-unknown");
+ std::string featureString;
+ OwningPtr<const TargetMachine>
+ targetMachine(T.createTargetMachine(tripleString, featureString));
+ InitRegisterMap(targetMachine->getRegisterInfo());
+ }
+};
+
+class ThumbAsmLexer : public ARMBaseAsmLexer {
+public:
+ ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : ARMBaseAsmLexer(T, MAI) {
+ std::string tripleString("thumb-unknown-unknown");
+ std::string featureString;
+ OwningPtr<const TargetMachine>
+ targetMachine(T.createTargetMachine(tripleString, featureString));
+ InitRegisterMap(targetMachine->getRegisterInfo());
+ }
+};
+
+} // end anonymous namespace
+
+AsmToken ARMBaseAsmLexer::LexTokenUAL() {
+ const AsmToken &lexedToken = lexDefinite();
+
+ switch (lexedToken.getKind()) {
+ default: break;
+ case AsmToken::Error:
+ SetError(Lexer->getErrLoc(), Lexer->getErr());
+ break;
+ case AsmToken::Identifier: {
+ std::string upperCase = lexedToken.getString().str();
+ std::string lowerCase = LowercaseString(upperCase);
+ StringRef lowerRef(lowerCase);
+
+ unsigned regID = MatchRegisterName(lowerRef);
+ // Check for register aliases.
+ // r13 -> sp
+ // r14 -> lr
+ // r15 -> pc
+ // ip -> r12
+ // FIXME: Some assemblers support lots of others. Do we want them all?
+ if (!regID) {
+ regID = StringSwitch<unsigned>(lowerCase)
+ .Case("r13", ARM::SP)
+ .Case("r14", ARM::LR)
+ .Case("r15", ARM::PC)
+ .Case("ip", ARM::R12)
+ .Default(0);
+ }
+
+ if (regID)
+ return AsmToken(AsmToken::Register,
+ lexedToken.getString(),
+ static_cast<int64_t>(regID));
+ }
+ }
+
+ return AsmToken(lexedToken);
+}
+
+extern "C" void LLVMInitializeARMAsmLexer() {
+ RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget);
+ RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
+}
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
new file mode 100644
index 0000000..129af20
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -0,0 +1,1866 @@
+//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMCExpr.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+/// Shift types used for register controlled shifts in ARM memory addressing.
+enum ShiftType {
+ Lsl,
+ Lsr,
+ Asr,
+ Ror,
+ Rrx
+};
+
+namespace {
+
+class ARMOperand;
+
+class ARMAsmParser : public TargetAsmParser {
+ MCAsmParser &Parser;
+ TargetMachine &TM;
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+ int TryParseRegister();
+ virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+ bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
+ bool ParsePrefix(ARMMCExpr::VariantKind &RefKind);
+ const MCExpr *ApplyPrefixToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant);
+
+
+ bool ParseMemoryOffsetReg(bool &Negative,
+ bool &OffsetRegShifted,
+ enum ShiftType &ShiftType,
+ const MCExpr *&ShiftAmount,
+ const MCExpr *&Offset,
+ bool &OffsetIsReg,
+ int &OffsetRegNum,
+ SMLoc &E);
+ bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E);
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+ bool ParseDirectiveThumb(SMLoc L);
+ bool ParseDirectiveThumbFunc(SMLoc L);
+ bool ParseDirectiveCode(SMLoc L);
+ bool ParseDirectiveSyntax(SMLoc L);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out);
+ void GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+ bool &CanAcceptPredicationCode);
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "ARMGenAsmMatcher.inc"
+
+ /// }
+
+ OperandMatchResultTy tryParseCoprocNumOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseCoprocRegOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseMemBarrierOptOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseProcIFlagsOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseMSRMaskOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+
+public:
+ ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
+ : TargetAsmParser(T), Parser(_Parser), TM(_TM) {
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(
+ &TM.getSubtarget<ARMSubtarget>()));
+ }
+
+ virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ virtual bool ParseDirective(AsmToken DirectiveID);
+};
+} // end anonymous namespace
+
+namespace {
+
+/// ARMOperand - Instances of this class represent a parsed ARM machine
+/// instruction.
+class ARMOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ CondCode,
+ CCOut,
+ CoprocNum,
+ CoprocReg,
+ Immediate,
+ MemBarrierOpt,
+ Memory,
+ MSRMask,
+ ProcIFlags,
+ Register,
+ RegisterList,
+ DPRRegisterList,
+ SPRRegisterList,
+ Token
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+ SmallVector<unsigned, 8> Registers;
+
+ union {
+ struct {
+ ARMCC::CondCodes Val;
+ } CC;
+
+ struct {
+ ARM_MB::MemBOpt Val;
+ } MBOpt;
+
+ struct {
+ unsigned Val;
+ } Cop;
+
+ struct {
+ ARM_PROC::IFlags Val;
+ } IFlags;
+
+ struct {
+ unsigned Val;
+ } MMask;
+
+ struct {
+ const char *Data;
+ unsigned Length;
+ } Tok;
+
+ struct {
+ unsigned RegNum;
+ } Reg;
+
+ struct {
+ const MCExpr *Val;
+ } Imm;
+
+ /// Combined record for all forms of ARM address expressions.
+ struct {
+ unsigned BaseRegNum;
+ union {
+ unsigned RegNum; ///< Offset register num, when OffsetIsReg.
+ const MCExpr *Value; ///< Offset value, when !OffsetIsReg.
+ } Offset;
+ const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
+ enum ShiftType ShiftType; // used when OffsetRegShifted is true
+ unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true
+ unsigned Preindexed : 1;
+ unsigned Postindexed : 1;
+ unsigned OffsetIsReg : 1;
+ unsigned Negative : 1; // only used when OffsetIsReg is true
+ unsigned Writeback : 1;
+ } Mem;
+ };
+
+ ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+ ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
+ Kind = o.Kind;
+ StartLoc = o.StartLoc;
+ EndLoc = o.EndLoc;
+ switch (Kind) {
+ case CondCode:
+ CC = o.CC;
+ break;
+ case Token:
+ Tok = o.Tok;
+ break;
+ case CCOut:
+ case Register:
+ Reg = o.Reg;
+ break;
+ case RegisterList:
+ case DPRRegisterList:
+ case SPRRegisterList:
+ Registers = o.Registers;
+ break;
+ case CoprocNum:
+ case CoprocReg:
+ Cop = o.Cop;
+ break;
+ case Immediate:
+ Imm = o.Imm;
+ break;
+ case MemBarrierOpt:
+ MBOpt = o.MBOpt;
+ break;
+ case Memory:
+ Mem = o.Mem;
+ break;
+ case MSRMask:
+ MMask = o.MMask;
+ break;
+ case ProcIFlags:
+ IFlags = o.IFlags;
+ }
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
+ ARMCC::CondCodes getCondCode() const {
+ assert(Kind == CondCode && "Invalid access!");
+ return CC.Val;
+ }
+
+ unsigned getCoproc() const {
+ assert((Kind == CoprocNum || Kind == CoprocReg) && "Invalid access!");
+ return Cop.Val;
+ }
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ unsigned getReg() const {
+ assert((Kind == Register || Kind == CCOut) && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const SmallVectorImpl<unsigned> &getRegList() const {
+ assert((Kind == RegisterList || Kind == DPRRegisterList ||
+ Kind == SPRRegisterList) && "Invalid access!");
+ return Registers;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ ARM_MB::MemBOpt getMemBarrierOpt() const {
+ assert(Kind == MemBarrierOpt && "Invalid access!");
+ return MBOpt.Val;
+ }
+
+ ARM_PROC::IFlags getProcIFlags() const {
+ assert(Kind == ProcIFlags && "Invalid access!");
+ return IFlags.Val;
+ }
+
+ unsigned getMSRMask() const {
+ assert(Kind == MSRMask && "Invalid access!");
+ return MMask.Val;
+ }
+
+ /// @name Memory Operand Accessors
+ /// @{
+
+ unsigned getMemBaseRegNum() const {
+ return Mem.BaseRegNum;
+ }
+ unsigned getMemOffsetRegNum() const {
+ assert(Mem.OffsetIsReg && "Invalid access!");
+ return Mem.Offset.RegNum;
+ }
+ const MCExpr *getMemOffset() const {
+ assert(!Mem.OffsetIsReg && "Invalid access!");
+ return Mem.Offset.Value;
+ }
+ unsigned getMemOffsetRegShifted() const {
+ assert(Mem.OffsetIsReg && "Invalid access!");
+ return Mem.OffsetRegShifted;
+ }
+ const MCExpr *getMemShiftAmount() const {
+ assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
+ return Mem.ShiftAmount;
+ }
+ enum ShiftType getMemShiftType() const {
+ assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
+ return Mem.ShiftType;
+ }
+ bool getMemPreindexed() const { return Mem.Preindexed; }
+ bool getMemPostindexed() const { return Mem.Postindexed; }
+ bool getMemOffsetIsReg() const { return Mem.OffsetIsReg; }
+ bool getMemNegative() const { return Mem.Negative; }
+ bool getMemWriteback() const { return Mem.Writeback; }
+
+ /// @}
+
+ bool isCoprocNum() const { return Kind == CoprocNum; }
+ bool isCoprocReg() const { return Kind == CoprocReg; }
+ bool isCondCode() const { return Kind == CondCode; }
+ bool isCCOut() const { return Kind == CCOut; }
+ bool isImm() const { return Kind == Immediate; }
+ bool isReg() const { return Kind == Register; }
+ bool isRegList() const { return Kind == RegisterList; }
+ bool isDPRRegList() const { return Kind == DPRRegisterList; }
+ bool isSPRRegList() const { return Kind == SPRRegisterList; }
+ bool isToken() const { return Kind == Token; }
+ bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; }
+ bool isMemory() const { return Kind == Memory; }
+ bool isMemMode5() const {
+ if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() ||
+ getMemNegative())
+ return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ if (!CE) return false;
+
+ // The offset must be a multiple of 4 in the range 0-1020.
+ int64_t Value = CE->getValue();
+ return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
+ }
+ bool isMemModeRegThumb() const {
+ if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback())
+ return false;
+ return true;
+ }
+ bool isMemModeImmThumb() const {
+ if (!isMemory() || getMemOffsetIsReg() || getMemWriteback())
+ return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ if (!CE) return false;
+
+ // The offset must be a multiple of 4 in the range 0-124.
+ uint64_t Value = CE->getValue();
+ return ((Value & 0x3) == 0 && Value <= 124);
+ }
+ bool isMSRMask() const { return Kind == MSRMask; }
+ bool isProcIFlags() const { return Kind == ProcIFlags; }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible. Null MCExpr = 0.
+ if (Expr == 0)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+ unsigned RegNum = getCondCode() == ARMCC::AL ? 0: ARM::CPSR;
+ Inst.addOperand(MCOperand::CreateReg(RegNum));
+ }
+
+ void addCoprocNumOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+ }
+
+ void addCoprocRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+ }
+
+ void addCCOutOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addRegListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const SmallVectorImpl<unsigned> &RegList = getRegList();
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = RegList.begin(), E = RegList.end(); I != E; ++I)
+ Inst.addOperand(MCOperand::CreateReg(*I));
+ }
+
+ void addDPRRegListOperands(MCInst &Inst, unsigned N) const {
+ addRegListOperands(Inst, N);
+ }
+
+ void addSPRRegListOperands(MCInst &Inst, unsigned N) const {
+ addRegListOperands(Inst, N);
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
+ }
+
+ void addMemMode5Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && isMemMode5() && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+ assert(!getMemOffsetIsReg() && "Invalid mode 5 operand");
+
+ // FIXME: #-0 is encoded differently than #0. Does the parser preserve
+ // the difference?
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ assert(CE && "Non-constant mode 5 offset operand!");
+
+ // The MCInst offset operand doesn't include the low two bits (like
+ // the instruction encoding).
+ int64_t Offset = CE->getValue() / 4;
+ if (Offset >= 0)
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add,
+ Offset)));
+ else
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub,
+ -Offset)));
+ }
+
+ void addMemModeRegThumbOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && isMemModeRegThumb() && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+ Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+ }
+
+ void addMemModeImmThumbOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && isMemModeImmThumb() && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ assert(CE && "Non-constant mode offset operand!");
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ }
+
+ void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getMSRMask())));
+ }
+
+ void addProcIFlagsOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
+ }
+
+ virtual void dump(raw_ostream &OS) const;
+
+ static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(CondCode);
+ Op->CC.Val = CC;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(CoprocNum);
+ Op->Cop.Val = CopVal;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(CoprocReg);
+ Op->Cop.Val = CopVal;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(CCOut);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(Register);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *
+ CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs,
+ SMLoc StartLoc, SMLoc EndLoc) {
+ KindTy Kind = RegisterList;
+
+ if (ARM::DPRRegClass.contains(Regs.front().first))
+ Kind = DPRRegisterList;
+ else if (ARM::SPRRegClass.contains(Regs.front().first))
+ Kind = SPRRegisterList;
+
+ ARMOperand *Op = new ARMOperand(Kind);
+ for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
+ I = Regs.begin(), E = Regs.end(); I != E; ++I)
+ Op->Registers.push_back(I->first);
+ array_pod_sort(Op->Registers.begin(), Op->Registers.end());
+ Op->StartLoc = StartLoc;
+ Op->EndLoc = EndLoc;
+ return Op;
+ }
+
+ static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
+ const MCExpr *Offset, int OffsetRegNum,
+ bool OffsetRegShifted, enum ShiftType ShiftType,
+ const MCExpr *ShiftAmount, bool Preindexed,
+ bool Postindexed, bool Negative, bool Writeback,
+ SMLoc S, SMLoc E) {
+ assert((OffsetRegNum == -1 || OffsetIsReg) &&
+ "OffsetRegNum must imply OffsetIsReg!");
+ assert((!OffsetRegShifted || OffsetIsReg) &&
+ "OffsetRegShifted must imply OffsetIsReg!");
+ assert((Offset || OffsetIsReg) &&
+ "Offset must exists unless register offset is used!");
+ assert((!ShiftAmount || (OffsetIsReg && OffsetRegShifted)) &&
+ "Cannot have shift amount without shifted register offset!");
+ assert((!Offset || !OffsetIsReg) &&
+ "Cannot have expression offset and register offset!");
+
+ ARMOperand *Op = new ARMOperand(Memory);
+ Op->Mem.BaseRegNum = BaseRegNum;
+ Op->Mem.OffsetIsReg = OffsetIsReg;
+ if (OffsetIsReg)
+ Op->Mem.Offset.RegNum = OffsetRegNum;
+ else
+ Op->Mem.Offset.Value = Offset;
+ Op->Mem.OffsetRegShifted = OffsetRegShifted;
+ Op->Mem.ShiftType = ShiftType;
+ Op->Mem.ShiftAmount = ShiftAmount;
+ Op->Mem.Preindexed = Preindexed;
+ Op->Mem.Postindexed = Postindexed;
+ Op->Mem.Negative = Negative;
+ Op->Mem.Writeback = Writeback;
+
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(MemBarrierOpt);
+ Op->MBOpt.Val = Opt;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(ProcIFlags);
+ Op->IFlags.Val = IFlags;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(MSRMask);
+ Op->MMask.Val = MMask;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+};
+
+} // end anonymous namespace.
+
+void ARMOperand::dump(raw_ostream &OS) const {
+ switch (Kind) {
+ case CondCode:
+ OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
+ break;
+ case CCOut:
+ OS << "<ccout " << getReg() << ">";
+ break;
+ case CoprocNum:
+ OS << "<coprocessor number: " << getCoproc() << ">";
+ break;
+ case CoprocReg:
+ OS << "<coprocessor register: " << getCoproc() << ">";
+ break;
+ case MSRMask:
+ OS << "<mask: " << getMSRMask() << ">";
+ break;
+ case Immediate:
+ getImm()->print(OS);
+ break;
+ case MemBarrierOpt:
+ OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">";
+ break;
+ case Memory:
+ OS << "<memory "
+ << "base:" << getMemBaseRegNum();
+ if (getMemOffsetIsReg()) {
+ OS << " offset:<register " << getMemOffsetRegNum();
+ if (getMemOffsetRegShifted()) {
+ OS << " offset-shift-type:" << getMemShiftType();
+ OS << " offset-shift-amount:" << *getMemShiftAmount();
+ }
+ } else {
+ OS << " offset:" << *getMemOffset();
+ }
+ if (getMemOffsetIsReg())
+ OS << " (offset-is-reg)";
+ if (getMemPreindexed())
+ OS << " (pre-indexed)";
+ if (getMemPostindexed())
+ OS << " (post-indexed)";
+ if (getMemNegative())
+ OS << " (negative)";
+ if (getMemWriteback())
+ OS << " (writeback)";
+ OS << ">";
+ break;
+ case ProcIFlags: {
+ OS << "<ARM_PROC::";
+ unsigned IFlags = getProcIFlags();
+ for (int i=2; i >= 0; --i)
+ if (IFlags & (1 << i))
+ OS << ARM_PROC::IFlagsToString(1 << i);
+ OS << ">";
+ break;
+ }
+ case Register:
+ OS << "<register " << getReg() << ">";
+ break;
+ case RegisterList:
+ case DPRRegisterList:
+ case SPRRegisterList: {
+ OS << "<register_list ";
+
+ const SmallVectorImpl<unsigned> &RegList = getRegList();
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = RegList.begin(), E = RegList.end(); I != E; ) {
+ OS << *I;
+ if (++I < E) OS << ", ";
+ }
+
+ OS << ">";
+ break;
+ }
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
+bool ARMAsmParser::ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc, SMLoc &EndLoc) {
+ RegNo = TryParseRegister();
+
+ return (RegNo == (unsigned)-1);
+}
+
+/// Try to parse a register name. The token must be an Identifier when called,
+/// and if it is a register name the token is eaten and the register number is
+/// returned. Otherwise return -1.
+///
+int ARMAsmParser::TryParseRegister() {
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+ // FIXME: Validate register for the current architecture; we have to do
+ // validation later, so maybe there is no need for this here.
+ std::string upperCase = Tok.getString().str();
+ std::string lowerCase = LowercaseString(upperCase);
+ unsigned RegNum = MatchRegisterName(lowerCase);
+ if (!RegNum) {
+ RegNum = StringSwitch<unsigned>(lowerCase)
+ .Case("r13", ARM::SP)
+ .Case("r14", ARM::LR)
+ .Case("r15", ARM::PC)
+ .Case("ip", ARM::R12)
+ .Default(0);
+ }
+ if (!RegNum) return -1;
+
+ Parser.Lex(); // Eat identifier token.
+ return RegNum;
+}
+
+/// Try to parse a register name. The token must be an Identifier when called.
+/// If it's a register, an AsmOperand is created. Another AsmOperand is created
+/// if there is a "writeback". 'true' if it's not a register.
+///
+/// TODO this is likely to change to allow different register types and or to
+/// parse for a specific register type.
+bool ARMAsmParser::
+TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ int RegNo = TryParseRegister();
+ if (RegNo == -1)
+ return true;
+
+ Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc()));
+
+ const AsmToken &ExclaimTok = Parser.getTok();
+ if (ExclaimTok.is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(),
+ ExclaimTok.getLoc()));
+ Parser.Lex(); // Eat exclaim token
+ }
+
+ return false;
+}
+
+/// MatchCoprocessorOperandName - Try to parse an coprocessor related
+/// instruction with a symbolic operand name. Example: "p1", "p7", "c3",
+/// "c5", ...
+static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
+ // Use the same layout as the tablegen'erated register name matcher. Ugly,
+ // but efficient.
+ switch (Name.size()) {
+ default: break;
+ case 2:
+ if (Name[0] != CoprocOp)
+ return -1;
+ switch (Name[1]) {
+ default: return -1;
+ case '0': return 0;
+ case '1': return 1;
+ case '2': return 2;
+ case '3': return 3;
+ case '4': return 4;
+ case '5': return 5;
+ case '6': return 6;
+ case '7': return 7;
+ case '8': return 8;
+ case '9': return 9;
+ }
+ break;
+ case 3:
+ if (Name[0] != CoprocOp || Name[1] != '1')
+ return -1;
+ switch (Name[2]) {
+ default: return -1;
+ case '0': return 10;
+ case '1': return 11;
+ case '2': return 12;
+ case '3': return 13;
+ case '4': return 14;
+ case '5': return 15;
+ }
+ break;
+ }
+
+ return -1;
+}
+
+/// tryParseCoprocNumOperand - Try to parse an coprocessor number operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+ int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
+ if (Num == -1)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateCoprocNum(Num, S));
+ return MatchOperand_Success;
+}
+
+/// tryParseCoprocRegOperand - Try to parse an coprocessor register operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+ int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c');
+ if (Reg == -1)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateCoprocReg(Reg, S));
+ return MatchOperand_Success;
+}
+
+/// Parse a register list, return it if successful else return null. The first
+/// token must be a '{' when called.
+bool ARMAsmParser::
+ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ assert(Parser.getTok().is(AsmToken::LCurly) &&
+ "Token is not a Left Curly Brace");
+ SMLoc S = Parser.getTok().getLoc();
+
+ // Read the rest of the registers in the list.
+ unsigned PrevRegNum = 0;
+ SmallVector<std::pair<unsigned, SMLoc>, 32> Registers;
+
+ do {
+ bool IsRange = Parser.getTok().is(AsmToken::Minus);
+ Parser.Lex(); // Eat non-identifier token.
+
+ const AsmToken &RegTok = Parser.getTok();
+ SMLoc RegLoc = RegTok.getLoc();
+ if (RegTok.isNot(AsmToken::Identifier)) {
+ Error(RegLoc, "register expected");
+ return true;
+ }
+
+ int RegNum = TryParseRegister();
+ if (RegNum == -1) {
+ Error(RegLoc, "register expected");
+ return true;
+ }
+
+ if (IsRange) {
+ int Reg = PrevRegNum;
+ do {
+ ++Reg;
+ Registers.push_back(std::make_pair(Reg, RegLoc));
+ } while (Reg != RegNum);
+ } else {
+ Registers.push_back(std::make_pair(RegNum, RegLoc));
+ }
+
+ PrevRegNum = RegNum;
+ } while (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::Minus));
+
+ // Process the right curly brace of the list.
+ const AsmToken &RCurlyTok = Parser.getTok();
+ if (RCurlyTok.isNot(AsmToken::RCurly)) {
+ Error(RCurlyTok.getLoc(), "'}' expected");
+ return true;
+ }
+
+ SMLoc E = RCurlyTok.getLoc();
+ Parser.Lex(); // Eat right curly brace token.
+
+ // Verify the register list.
+ SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
+ RI = Registers.begin(), RE = Registers.end();
+
+ unsigned HighRegNum = getARMRegisterNumbering(RI->first);
+ bool EmittedWarning = false;
+
+ DenseMap<unsigned, bool> RegMap;
+ RegMap[HighRegNum] = true;
+
+ for (++RI; RI != RE; ++RI) {
+ const std::pair<unsigned, SMLoc> &RegInfo = *RI;
+ unsigned Reg = getARMRegisterNumbering(RegInfo.first);
+
+ if (RegMap[Reg]) {
+ Error(RegInfo.second, "register duplicated in register list");
+ return true;
+ }
+
+ if (!EmittedWarning && Reg < HighRegNum)
+ Warning(RegInfo.second,
+ "register not in ascending order in register list");
+
+ RegMap[Reg] = true;
+ HighRegNum = std::max(Reg, HighRegNum);
+ }
+
+ Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
+ return false;
+}
+
+/// tryParseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ StringRef OptStr = Tok.getString();
+
+ unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()))
+ .Case("sy", ARM_MB::SY)
+ .Case("st", ARM_MB::ST)
+ .Case("ish", ARM_MB::ISH)
+ .Case("ishst", ARM_MB::ISHST)
+ .Case("nsh", ARM_MB::NSH)
+ .Case("nshst", ARM_MB::NSHST)
+ .Case("osh", ARM_MB::OSH)
+ .Case("oshst", ARM_MB::OSHST)
+ .Default(~0U);
+
+ if (Opt == ~0U)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
+ return MatchOperand_Success;
+}
+
+/// tryParseProcIFlagsOperand - Try to parse iflags from CPS instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ StringRef IFlagsStr = Tok.getString();
+
+ unsigned IFlags = 0;
+ for (int i = 0, e = IFlagsStr.size(); i != e; ++i) {
+ unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1))
+ .Case("a", ARM_PROC::A)
+ .Case("i", ARM_PROC::I)
+ .Case("f", ARM_PROC::F)
+ .Default(~0U);
+
+ // If some specific iflag is already set, it means that some letter is
+ // present more than once, this is not acceptable.
+ if (Flag == ~0U || (IFlags & Flag))
+ return MatchOperand_NoMatch;
+
+ IFlags |= Flag;
+ }
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateProcIFlags((ARM_PROC::IFlags)IFlags, S));
+ return MatchOperand_Success;
+}
+
+/// tryParseMSRMaskOperand - Try to parse mask flags from MSR instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ StringRef Mask = Tok.getString();
+
+ // Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf"
+ size_t Start = 0, Next = Mask.find('_');
+ StringRef Flags = "";
+ StringRef SpecReg = Mask.slice(Start, Next);
+ if (Next != StringRef::npos)
+ Flags = Mask.slice(Next+1, Mask.size());
+
+ // FlagsVal contains the complete mask:
+ // 3-0: Mask
+ // 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+ unsigned FlagsVal = 0;
+
+ if (SpecReg == "apsr") {
+ FlagsVal = StringSwitch<unsigned>(Flags)
+ .Case("nzcvq", 0x8) // same as CPSR_c
+ .Case("g", 0x4) // same as CPSR_s
+ .Case("nzcvqg", 0xc) // same as CPSR_fs
+ .Default(~0U);
+
+ if (FlagsVal == ~0U) {
+ if (!Flags.empty())
+ return MatchOperand_NoMatch;
+ else
+ FlagsVal = 0; // No flag
+ }
+ } else if (SpecReg == "cpsr" || SpecReg == "spsr") {
+ for (int i = 0, e = Flags.size(); i != e; ++i) {
+ unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1))
+ .Case("c", 1)
+ .Case("x", 2)
+ .Case("s", 4)
+ .Case("f", 8)
+ .Default(~0U);
+
+ // If some specific flag is already set, it means that some letter is
+ // present more than once, this is not acceptable.
+ if (FlagsVal == ~0U || (FlagsVal & Flag))
+ return MatchOperand_NoMatch;
+ FlagsVal |= Flag;
+ }
+ } else // No match for special register.
+ return MatchOperand_NoMatch;
+
+ // Special register without flags are equivalent to "fc" flags.
+ if (!FlagsVal)
+ FlagsVal = 0x9;
+
+ // Bit 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+ if (SpecReg == "spsr")
+ FlagsVal |= 16;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
+ return MatchOperand_Success;
+}
+
+/// Parse an ARM memory expression, return false if successful else return true
+/// or an error. The first token must be a '[' when called.
+///
+/// TODO Only preindexing and postindexing addressing are started, unindexed
+/// with option, etc are still to do.
+bool ARMAsmParser::
+ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S, E;
+ assert(Parser.getTok().is(AsmToken::LBrac) &&
+ "Token is not a Left Bracket");
+ S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat left bracket token.
+
+ const AsmToken &BaseRegTok = Parser.getTok();
+ if (BaseRegTok.isNot(AsmToken::Identifier)) {
+ Error(BaseRegTok.getLoc(), "register expected");
+ return true;
+ }
+ int BaseRegNum = TryParseRegister();
+ if (BaseRegNum == -1) {
+ Error(BaseRegTok.getLoc(), "register expected");
+ return true;
+ }
+
+ // The next token must either be a comma or a closing bracket.
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac))
+ return true;
+
+ bool Preindexed = false;
+ bool Postindexed = false;
+ bool OffsetIsReg = false;
+ bool Negative = false;
+ bool Writeback = false;
+ ARMOperand *WBOp = 0;
+ int OffsetRegNum = -1;
+ bool OffsetRegShifted = false;
+ enum ShiftType ShiftType = Lsl;
+ const MCExpr *ShiftAmount = 0;
+ const MCExpr *Offset = 0;
+
+ // First look for preindexed address forms, that is after the "[Rn" we now
+ // have to see if the next token is a comma.
+ if (Tok.is(AsmToken::Comma)) {
+ Preindexed = true;
+ Parser.Lex(); // Eat comma token.
+
+ if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
+ Offset, OffsetIsReg, OffsetRegNum, E))
+ return true;
+ const AsmToken &RBracTok = Parser.getTok();
+ if (RBracTok.isNot(AsmToken::RBrac)) {
+ Error(RBracTok.getLoc(), "']' expected");
+ return true;
+ }
+ E = RBracTok.getLoc();
+ Parser.Lex(); // Eat right bracket token.
+
+ const AsmToken &ExclaimTok = Parser.getTok();
+ if (ExclaimTok.is(AsmToken::Exclaim)) {
+ WBOp = ARMOperand::CreateToken(ExclaimTok.getString(),
+ ExclaimTok.getLoc());
+ Writeback = true;
+ Parser.Lex(); // Eat exclaim token
+ }
+ } else {
+ // The "[Rn" we have so far was not followed by a comma.
+
+ // If there's anything other than the right brace, this is a post indexing
+ // addressing form.
+ E = Tok.getLoc();
+ Parser.Lex(); // Eat right bracket token.
+
+ const AsmToken &NextTok = Parser.getTok();
+
+ if (NextTok.isNot(AsmToken::EndOfStatement)) {
+ Postindexed = true;
+ Writeback = true;
+
+ if (NextTok.isNot(AsmToken::Comma)) {
+ Error(NextTok.getLoc(), "',' expected");
+ return true;
+ }
+
+ Parser.Lex(); // Eat comma token.
+
+ if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
+ ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
+ E))
+ return true;
+ }
+ }
+
+ // Force Offset to exist if used.
+ if (!OffsetIsReg) {
+ if (!Offset)
+ Offset = MCConstantExpr::Create(0, getContext());
+ }
+
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset,
+ OffsetRegNum, OffsetRegShifted,
+ ShiftType, ShiftAmount, Preindexed,
+ Postindexed, Negative, Writeback,
+ S, E));
+ if (WBOp)
+ Operands.push_back(WBOp);
+
+ return false;
+}
+
+/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
+/// we will parse the following (were +/- means that a plus or minus is
+/// optional):
+/// +/-Rm
+/// +/-Rm, shift
+/// #offset
+/// we return false on success or an error otherwise.
+bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
+ bool &OffsetRegShifted,
+ enum ShiftType &ShiftType,
+ const MCExpr *&ShiftAmount,
+ const MCExpr *&Offset,
+ bool &OffsetIsReg,
+ int &OffsetRegNum,
+ SMLoc &E) {
+ Negative = false;
+ OffsetRegShifted = false;
+ OffsetIsReg = false;
+ OffsetRegNum = -1;
+ const AsmToken &NextTok = Parser.getTok();
+ E = NextTok.getLoc();
+ if (NextTok.is(AsmToken::Plus))
+ Parser.Lex(); // Eat plus token.
+ else if (NextTok.is(AsmToken::Minus)) {
+ Negative = true;
+ Parser.Lex(); // Eat minus token
+ }
+ // See if there is a register following the "[Rn," or "[Rn]," we have so far.
+ const AsmToken &OffsetRegTok = Parser.getTok();
+ if (OffsetRegTok.is(AsmToken::Identifier)) {
+ SMLoc CurLoc = OffsetRegTok.getLoc();
+ OffsetRegNum = TryParseRegister();
+ if (OffsetRegNum != -1) {
+ OffsetIsReg = true;
+ E = CurLoc;
+ }
+ }
+
+ // If we parsed a register as the offset then there can be a shift after that.
+ if (OffsetRegNum != -1) {
+ // Look for a comma then a shift
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat comma token.
+
+ const AsmToken &Tok = Parser.getTok();
+ if (ParseShift(ShiftType, ShiftAmount, E))
+ return Error(Tok.getLoc(), "shift expected");
+ OffsetRegShifted = true;
+ }
+ }
+ else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm"
+ // Look for #offset following the "[Rn," or "[Rn],"
+ const AsmToken &HashTok = Parser.getTok();
+ if (HashTok.isNot(AsmToken::Hash))
+ return Error(HashTok.getLoc(), "'#' expected");
+
+ Parser.Lex(); // Eat hash token.
+
+ if (getParser().ParseExpression(Offset))
+ return true;
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ }
+ return false;
+}
+
+/// ParseShift as one of these two:
+/// ( lsl | lsr | asr | ror ) , # shift_amount
+/// rrx
+/// and returns true if it parses a shift otherwise it returns false.
+bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount,
+ SMLoc &E) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return true;
+ StringRef ShiftName = Tok.getString();
+ if (ShiftName == "lsl" || ShiftName == "LSL")
+ St = Lsl;
+ else if (ShiftName == "lsr" || ShiftName == "LSR")
+ St = Lsr;
+ else if (ShiftName == "asr" || ShiftName == "ASR")
+ St = Asr;
+ else if (ShiftName == "ror" || ShiftName == "ROR")
+ St = Ror;
+ else if (ShiftName == "rrx" || ShiftName == "RRX")
+ St = Rrx;
+ else
+ return true;
+ Parser.Lex(); // Eat shift type token.
+
+ // Rrx stands alone.
+ if (St == Rrx)
+ return false;
+
+ // Otherwise, there must be a '#' and a shift amount.
+ const AsmToken &HashTok = Parser.getTok();
+ if (HashTok.isNot(AsmToken::Hash))
+ return Error(HashTok.getLoc(), "'#' expected");
+ Parser.Lex(); // Eat hash token.
+
+ if (getParser().ParseExpression(ShiftAmount))
+ return true;
+
+ return false;
+}
+
+/// Parse a arm instruction operand. For now this parses the operand regardless
+/// of the mnemonic.
+bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ StringRef Mnemonic) {
+ SMLoc S, E;
+
+ // Check if the current operand has a custom associated parser, if so, try to
+ // custom parse the operand, or fallback to the general approach.
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ if (ResTy == MatchOperand_Success)
+ return false;
+ // If there wasn't a custom match, try the generic matcher below. Otherwise,
+ // there was a match, but an error occurred, in which case, just return that
+ // the operand parsing failed.
+ if (ResTy == MatchOperand_ParseFail)
+ return true;
+
+ switch (getLexer().getKind()) {
+ default:
+ Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return true;
+ case AsmToken::Identifier:
+ if (!TryParseRegisterWithWriteBack(Operands))
+ return false;
+
+ // Fall though for the Identifier case that is not a register or a
+ // special name.
+ case AsmToken::Integer: // things like 1f and 2b as a branch targets
+ case AsmToken::Dot: { // . as a branch target
+ // This was not a register so parse other operands that start with an
+ // identifier (like labels) as expressions and create them as immediates.
+ const MCExpr *IdVal;
+ S = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(IdVal))
+ return true;
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(IdVal, S, E));
+ return false;
+ }
+ case AsmToken::LBrac:
+ return ParseMemory(Operands);
+ case AsmToken::LCurly:
+ return ParseRegisterList(Operands);
+ case AsmToken::Hash:
+ // #42 -> immediate.
+ // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+ const MCExpr *ImmVal;
+ if (getParser().ParseExpression(ImmVal))
+ return true;
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
+ return false;
+ case AsmToken::Colon: {
+ // ":lower16:" and ":upper16:" expression prefixes
+ // FIXME: Check it's an expression prefix,
+ // e.g. (FOO - :lower16:BAR) isn't legal.
+ ARMMCExpr::VariantKind RefKind;
+ if (ParsePrefix(RefKind))
+ return true;
+
+ const MCExpr *SubExprVal;
+ if (getParser().ParseExpression(SubExprVal))
+ return true;
+
+ const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
+ getContext());
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E));
+ return false;
+ }
+ }
+}
+
+// ParsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
+// :lower16: and :upper16:.
+bool ARMAsmParser::ParsePrefix(ARMMCExpr::VariantKind &RefKind) {
+ RefKind = ARMMCExpr::VK_ARM_None;
+
+ // :lower16: and :upper16: modifiers
+ assert(getLexer().is(AsmToken::Colon) && "expected a :");
+ Parser.Lex(); // Eat ':'
+
+ if (getLexer().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), "expected prefix identifier in operand");
+ return true;
+ }
+
+ StringRef IDVal = Parser.getTok().getIdentifier();
+ if (IDVal == "lower16") {
+ RefKind = ARMMCExpr::VK_ARM_LO16;
+ } else if (IDVal == "upper16") {
+ RefKind = ARMMCExpr::VK_ARM_HI16;
+ } else {
+ Error(Parser.getTok().getLoc(), "unexpected prefix in operand");
+ return true;
+ }
+ Parser.Lex();
+
+ if (getLexer().isNot(AsmToken::Colon)) {
+ Error(Parser.getTok().getLoc(), "unexpected token after prefix");
+ return true;
+ }
+ Parser.Lex(); // Eat the last ':'
+ return false;
+}
+
+const MCExpr *
+ARMAsmParser::ApplyPrefixToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant) {
+ // Recurse over the given expression, rebuilding it to apply the given variant
+ // to the leftmost symbol.
+ if (Variant == MCSymbolRefExpr::VK_None)
+ return E;
+
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Can't handle target expr yet");
+ case MCExpr::Constant:
+ llvm_unreachable("Can't handle lower16/upper16 of constant yet");
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+
+ if (SRE->getKind() != MCSymbolRefExpr::VK_None)
+ return 0;
+
+ return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
+ }
+
+ case MCExpr::Unary:
+ llvm_unreachable("Can't handle unary expressions yet");
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ const MCExpr *LHS = ApplyPrefixToExpr(BE->getLHS(), Variant);
+ const MCExpr *RHS = BE->getRHS();
+ if (!LHS)
+ return 0;
+
+ return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
+ }
+ }
+
+ assert(0 && "Invalid expression kind!");
+ return 0;
+}
+
+/// \brief Given a mnemonic, split out possible predication code and carry
+/// setting letters to form a canonical mnemonic and flags.
+//
+// FIXME: Would be nice to autogen this.
+static StringRef SplitMnemonic(StringRef Mnemonic,
+ unsigned &PredicationCode,
+ bool &CarrySetting,
+ unsigned &ProcessorIMod) {
+ PredicationCode = ARMCC::AL;
+ CarrySetting = false;
+ ProcessorIMod = 0;
+
+ // Ignore some mnemonics we know aren't predicated forms.
+ //
+ // FIXME: Would be nice to autogen this.
+ if (Mnemonic == "teq" || Mnemonic == "vceq" ||
+ Mnemonic == "movs" ||
+ Mnemonic == "svc" ||
+ (Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
+ Mnemonic == "vmls" || Mnemonic == "vnmls") ||
+ Mnemonic == "vacge" || Mnemonic == "vcge" ||
+ Mnemonic == "vclt" ||
+ Mnemonic == "vacgt" || Mnemonic == "vcgt" ||
+ Mnemonic == "vcle" ||
+ (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" ||
+ Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" ||
+ Mnemonic == "vqdmlal"))
+ return Mnemonic;
+
+ // First, split out any predication code.
+ unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2))
+ .Case("eq", ARMCC::EQ)
+ .Case("ne", ARMCC::NE)
+ .Case("hs", ARMCC::HS)
+ .Case("lo", ARMCC::LO)
+ .Case("mi", ARMCC::MI)
+ .Case("pl", ARMCC::PL)
+ .Case("vs", ARMCC::VS)
+ .Case("vc", ARMCC::VC)
+ .Case("hi", ARMCC::HI)
+ .Case("ls", ARMCC::LS)
+ .Case("ge", ARMCC::GE)
+ .Case("lt", ARMCC::LT)
+ .Case("gt", ARMCC::GT)
+ .Case("le", ARMCC::LE)
+ .Case("al", ARMCC::AL)
+ .Default(~0U);
+ if (CC != ~0U) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
+ PredicationCode = CC;
+ }
+
+ // Next, determine if we have a carry setting bit. We explicitly ignore all
+ // the instructions we know end in 's'.
+ if (Mnemonic.endswith("s") &&
+ !(Mnemonic == "asrs" || Mnemonic == "cps" || Mnemonic == "mls" ||
+ Mnemonic == "movs" || Mnemonic == "mrs" || Mnemonic == "smmls" ||
+ Mnemonic == "vabs" || Mnemonic == "vcls" || Mnemonic == "vmls" ||
+ Mnemonic == "vmrs" || Mnemonic == "vnmls" || Mnemonic == "vqabs" ||
+ Mnemonic == "vrecps" || Mnemonic == "vrsqrts")) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
+ CarrySetting = true;
+ }
+
+ // The "cps" instruction can have a interrupt mode operand which is glued into
+ // the mnemonic. Check if this is the case, split it and parse the imod op
+ if (Mnemonic.startswith("cps")) {
+ // Split out any imod code.
+ unsigned IMod =
+ StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2, 2))
+ .Case("ie", ARM_PROC::IE)
+ .Case("id", ARM_PROC::ID)
+ .Default(~0U);
+ if (IMod != ~0U) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size()-2);
+ ProcessorIMod = IMod;
+ }
+ }
+
+ return Mnemonic;
+}
+
+/// \brief Given a canonical mnemonic, determine if the instruction ever allows
+/// inclusion of carry set or predication code operands.
+//
+// FIXME: It would be nice to autogen this.
+void ARMAsmParser::
+GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+ bool &CanAcceptPredicationCode) {
+ bool isThumb = TM.getSubtarget<ARMSubtarget>().isThumb();
+
+ if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+ Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
+ Mnemonic == "smull" || Mnemonic == "add" || Mnemonic == "adc" ||
+ Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" ||
+ Mnemonic == "umlal" || Mnemonic == "orr" || Mnemonic == "mov" ||
+ Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
+ Mnemonic == "sbc" || Mnemonic == "mla" || Mnemonic == "umull" ||
+ Mnemonic == "eor" || Mnemonic == "smlal" || Mnemonic == "mvn") {
+ CanAcceptCarrySet = true;
+ } else {
+ CanAcceptCarrySet = false;
+ }
+
+ if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" ||
+ Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" ||
+ Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" ||
+ Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" ||
+ Mnemonic == "dsb" || Mnemonic == "movs" || Mnemonic == "isb" ||
+ Mnemonic == "clrex" || Mnemonic.startswith("cps")) {
+ CanAcceptPredicationCode = false;
+ } else {
+ CanAcceptPredicationCode = true;
+ }
+
+ if (isThumb)
+ if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" ||
+ Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp")
+ CanAcceptPredicationCode = false;
+}
+
+/// Parse an arm instruction mnemonic followed by its operands.
+bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create the leading tokens for the mnemonic, split by '.' characters.
+ size_t Start = 0, Next = Name.find('.');
+ StringRef Head = Name.slice(Start, Next);
+
+ // Split out the predication code and carry setting flag from the mnemonic.
+ unsigned PredicationCode;
+ unsigned ProcessorIMod;
+ bool CarrySetting;
+ Head = SplitMnemonic(Head, PredicationCode, CarrySetting,
+ ProcessorIMod);
+
+ Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
+
+ // Next, add the CCOut and ConditionCode operands, if needed.
+ //
+ // For mnemonics which can ever incorporate a carry setting bit or predication
+ // code, our matching model involves us always generating CCOut and
+ // ConditionCode operands to match the mnemonic "as written" and then we let
+ // the matcher deal with finding the right instruction or generating an
+ // appropriate error.
+ bool CanAcceptCarrySet, CanAcceptPredicationCode;
+ GetMnemonicAcceptInfo(Head, CanAcceptCarrySet, CanAcceptPredicationCode);
+
+ // Add the carry setting operand, if necessary.
+ //
+ // FIXME: It would be awesome if we could somehow invent a location such that
+ // match errors on this operand would print a nice diagnostic about how the
+ // 's' character in the mnemonic resulted in a CCOut operand.
+ if (CanAcceptCarrySet) {
+ Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0,
+ NameLoc));
+ } else {
+ // This mnemonic can't ever accept a carry set, but the user wrote one (or
+ // misspelled another mnemonic).
+
+ // FIXME: Issue a nice error.
+ }
+
+ // Add the predication code operand, if necessary.
+ if (CanAcceptPredicationCode) {
+ Operands.push_back(ARMOperand::CreateCondCode(
+ ARMCC::CondCodes(PredicationCode), NameLoc));
+ } else {
+ // This mnemonic can't ever accept a predication code, but the user wrote
+ // one (or misspelled another mnemonic).
+
+ // FIXME: Issue a nice error.
+ }
+
+ // Add the processor imod operand, if necessary.
+ if (ProcessorIMod) {
+ Operands.push_back(ARMOperand::CreateImm(
+ MCConstantExpr::Create(ProcessorIMod, getContext()),
+ NameLoc, NameLoc));
+ } else {
+ // This mnemonic can't ever accept a imod, but the user wrote
+ // one (or misspelled another mnemonic).
+
+ // FIXME: Issue a nice error.
+ }
+
+ // Add the remaining tokens in the mnemonic.
+ while (Next != StringRef::npos) {
+ Start = Next;
+ Next = Name.find('.', Start + 1);
+ StringRef ExtraToken = Name.slice(Start, Next);
+
+ Operands.push_back(ARMOperand::CreateToken(ExtraToken, NameLoc));
+ }
+
+ // Read the remaining operands.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ // Read the first operand.
+ if (ParseOperand(Operands, Head)) {
+ Parser.EatToEndOfStatement();
+ return true;
+ }
+
+ while (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+
+ // Parse and remember the operand.
+ if (ParseOperand(Operands, Head)) {
+ Parser.EatToEndOfStatement();
+ return true;
+ }
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Parser.EatToEndOfStatement();
+ return TokError("unexpected token in argument list");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+ return false;
+}
+
+bool ARMAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out) {
+ MCInst Inst;
+ unsigned ErrorInfo;
+ MatchResultTy MatchResult, MatchResult2;
+ MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+ if (MatchResult != Match_Success) {
+ // If we get a Match_InvalidOperand it might be some arithmetic instruction
+ // that does not update the condition codes. So try adding a CCOut operand
+ // with a value of reg0.
+ if (MatchResult == Match_InvalidOperand) {
+ Operands.insert(Operands.begin() + 1,
+ ARMOperand::CreateCCOut(0,
+ ((ARMOperand*)Operands[0])->getStartLoc()));
+ MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+ if (MatchResult2 == Match_Success)
+ MatchResult = Match_Success;
+ else {
+ ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete CCOut;
+ }
+ }
+ // If we get a Match_MnemonicFail it might be some arithmetic instruction
+ // that updates the condition codes if it ends in 's'. So see if the
+ // mnemonic ends in 's' and if so try removing the 's' and adding a CCOut
+ // operand with a value of CPSR.
+ else if(MatchResult == Match_MnemonicFail) {
+ // Get the instruction mnemonic, which is the first token.
+ StringRef Mnemonic = ((ARMOperand*)Operands[0])->getToken();
+ if (Mnemonic.substr(Mnemonic.size()-1) == "s") {
+ // removed the 's' from the mnemonic for matching.
+ StringRef MnemonicNoS = Mnemonic.slice(0, Mnemonic.size() - 1);
+ SMLoc NameLoc = ((ARMOperand*)Operands[0])->getStartLoc();
+ ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
+ Operands.erase(Operands.begin());
+ delete OldMnemonic;
+ Operands.insert(Operands.begin(),
+ ARMOperand::CreateToken(MnemonicNoS, NameLoc));
+ Operands.insert(Operands.begin() + 1,
+ ARMOperand::CreateCCOut(ARM::CPSR, NameLoc));
+ MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+ if (MatchResult2 == Match_Success)
+ MatchResult = Match_Success;
+ else {
+ ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
+ Operands.erase(Operands.begin());
+ delete OldMnemonic;
+ Operands.insert(Operands.begin(),
+ ARMOperand::CreateToken(Mnemonic, NameLoc));
+ ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete CCOut;
+ }
+ }
+ }
+ }
+ switch (MatchResult) {
+ case Match_Success:
+ Out.EmitInstruction(Inst);
+ return false;
+ case Match_MissingFeature:
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction mnemonic");
+ case Match_ConversionFail:
+ return Error(IDLoc, "unable to convert operands to instruction");
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+ return true;
+}
+
+/// ParseDirective parses the arm specific directives
+bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return ParseDirectiveWord(4, DirectiveID.getLoc());
+ else if (IDVal == ".thumb")
+ return ParseDirectiveThumb(DirectiveID.getLoc());
+ else if (IDVal == ".thumb_func")
+ return ParseDirectiveThumbFunc(DirectiveID.getLoc());
+ else if (IDVal == ".code")
+ return ParseDirectiveCode(DirectiveID.getLoc());
+ else if (IDVal == ".syntax")
+ return ParseDirectiveSyntax(DirectiveID.getLoc());
+ return true;
+}
+
+/// ParseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+/// ParseDirectiveThumb
+/// ::= .thumb
+bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+
+ // TODO: set thumb mode
+ // TODO: tell the MC streamer the mode
+ // getParser().getStreamer().Emit???();
+ return false;
+}
+
+/// ParseDirectiveThumbFunc
+/// ::= .thumbfunc symbol_name
+bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
+ return Error(L, "unexpected token in .thumb_func directive");
+ StringRef Name = Tok.getString();
+ Parser.Lex(); // Consume the identifier token.
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+
+ // Mark symbol as a thumb symbol.
+ MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name);
+ getParser().getStreamer().EmitThumbFunc(Func);
+ return false;
+}
+
+/// ParseDirectiveSyntax
+/// ::= .syntax unified | divided
+bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return Error(L, "unexpected token in .syntax directive");
+ StringRef Mode = Tok.getString();
+ if (Mode == "unified" || Mode == "UNIFIED")
+ Parser.Lex();
+ else if (Mode == "divided" || Mode == "DIVIDED")
+ return Error(L, "'.syntax divided' arm asssembly not supported");
+ else
+ return Error(L, "unrecognized syntax mode in .syntax directive");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(), "unexpected token in directive");
+ Parser.Lex();
+
+ // TODO tell the MC streamer the mode
+ // getParser().getStreamer().Emit???();
+ return false;
+}
+
+/// ParseDirectiveCode
+/// ::= .code 16 | 32
+bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Integer))
+ return Error(L, "unexpected token in .code directive");
+ int64_t Val = Parser.getTok().getIntVal();
+ if (Val == 16)
+ Parser.Lex();
+ else if (Val == 32)
+ Parser.Lex();
+ else
+ return Error(L, "invalid operand to .code directive");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(), "unexpected token in directive");
+ Parser.Lex();
+
+ // FIXME: We need to be able switch subtargets at this point so that
+ // MatchInstructionImpl() will work when it gets the AvailableFeatures which
+ // includes Feature_IsThumb or not to match the right instructions. This is
+ // blocked on the FIXME in llvm-mc.cpp when creating the TargetMachine.
+ if (Val == 16){
+ assert(TM.getSubtarget<ARMSubtarget>().isThumb() &&
+ "switching between arm/thumb not yet suppported via .code 16)");
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ }
+ else{
+ assert(!TM.getSubtarget<ARMSubtarget>().isThumb() &&
+ "switching between thumb/arm not yet suppported via .code 32)");
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+ }
+
+ return false;
+}
+
+extern "C" void LLVMInitializeARMAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeARMAsmParser() {
+ RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
+ RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
+ LLVMInitializeARMAsmLexer();
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "ARMGenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
new file mode 100644
index 0000000..78d73d3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -0,0 +1,548 @@
+//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains code to implement the public interfaces of ARMDisassembler and
+// ThumbDisassembler, both of which are instances of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "ARMDisassembler.h"
+#include "ARMDisassemblerCore.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+//#define DEBUG(X) do { X; } while (0)
+
+/// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from
+/// ARMDecoderEmitter.cpp TableGen backend. It contains:
+///
+/// o Mappings from opcode to ARM/Thumb instruction format
+///
+/// o static uint16_t decodeInstruction(uint32_t insn) - the decoding function
+/// for an ARM instruction.
+///
+/// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding
+/// function for a Thumb instruction.
+///
+#include "ARMGenDecoderTables.inc"
+
+#include "ARMGenEDInfo.inc"
+
+using namespace llvm;
+
+/// showBitVector - Use the raw_ostream to log a diagnostic message describing
+/// the inidividual bits of the instruction.
+///
+static inline void showBitVector(raw_ostream &os, const uint32_t &insn) {
+ // Split the bit position markers into more than one lines to fit 80 columns.
+ os << " 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11"
+ << " 10 9 8 7 6 5 4 3 2 1 0 \n";
+ os << "---------------------------------------------------------------"
+ << "----------------------------------\n";
+ os << '|';
+ for (unsigned i = 32; i != 0; --i) {
+ if (insn >> (i - 1) & 0x01)
+ os << " 1";
+ else
+ os << " 0";
+ os << (i%4 == 1 ? '|' : ':');
+ }
+ os << '\n';
+ // Split the bit position markers into more than one lines to fit 80 columns.
+ os << "---------------------------------------------------------------"
+ << "----------------------------------\n";
+ os << '\n';
+}
+
+/// decodeARMInstruction is a decorator function which tries special cases of
+/// instruction matching before calling the auto-generated decoder function.
+static unsigned decodeARMInstruction(uint32_t &insn) {
+ if (slice(insn, 31, 28) == 15)
+ goto AutoGenedDecoder;
+
+ // Special case processing, if any, goes here....
+
+ // LLVM combines the offset mode of A8.6.197 & A8.6.198 into STRB.
+ // The insufficient encoding information of the combined instruction confuses
+ // the decoder wrt BFC/BFI. Therefore, we try to recover here.
+ // For BFC, Inst{27-21} = 0b0111110 & Inst{6-0} = 0b0011111.
+ // For BFI, Inst{27-21} = 0b0111110 & Inst{6-4} = 0b001 & Inst{3-0} =! 0b1111.
+ if (slice(insn, 27, 21) == 0x3e && slice(insn, 6, 4) == 1) {
+ if (slice(insn, 3, 0) == 15)
+ return ARM::BFC;
+ else
+ return ARM::BFI;
+ }
+
+ // Ditto for STRBT, which is a super-instruction for A8.6.199 Encodings
+ // A1 & A2.
+ // As a result, the decoder fails to deocode USAT properly.
+ if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1)
+ return ARM::USAT;
+
+ // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
+ // As a result, the decoder fails to decode UMULL properly.
+ if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) {
+ return ARM::UMULL;
+ }
+
+ // Ditto for STR_PRE, which is a super-instruction for A8.6.194 & A8.6.195.
+ // As a result, the decoder fails to decode SBFX properly.
+ if (slice(insn, 27, 21) == 0x3d && slice(insn, 6, 4) == 5)
+ return ARM::SBFX;
+
+ // And STRB_PRE, which is a super-instruction for A8.6.197 & A8.6.198.
+ // As a result, the decoder fails to decode UBFX properly.
+ if (slice(insn, 27, 21) == 0x3f && slice(insn, 6, 4) == 5)
+ return ARM::UBFX;
+
+ // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2.
+ // As a result, the decoder fails to deocode SSAT properly.
+ if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1)
+ return ARM::SSAT;
+
+ // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147.
+ // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT.
+ if (slice(insn, 27, 24) == 0) {
+ switch (slice(insn, 21, 20)) {
+ case 2:
+ switch (slice(insn, 7, 4)) {
+ case 11:
+ return ARM::STRHT;
+ default:
+ break; // fallthrough
+ }
+ break;
+ case 3:
+ switch (slice(insn, 7, 4)) {
+ case 11:
+ return ARM::LDRHT;
+ case 13:
+ return ARM::LDRSBT;
+ case 15:
+ return ARM::LDRSHT;
+ default:
+ break; // fallthrough
+ }
+ break;
+ default:
+ break; // fallthrough
+ }
+ }
+
+ // Ditto for SBCrs, which is a super-instruction for A8.6.152 & A8.6.153.
+ // As a result, the decoder fails to decode STRH_Post/LDRD_POST/STRD_POST
+ // properly.
+ if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 0) {
+ unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
+ switch (slice(insn, 7, 4)) {
+ case 11:
+ switch (PW) {
+ case 2: // Offset
+ return ARM::STRH;
+ case 3: // Pre-indexed
+ return ARM::STRH_PRE;
+ case 0: // Post-indexed
+ return ARM::STRH_POST;
+ default:
+ break; // fallthrough
+ }
+ break;
+ case 13:
+ switch (PW) {
+ case 2: // Offset
+ return ARM::LDRD;
+ case 3: // Pre-indexed
+ return ARM::LDRD_PRE;
+ case 0: // Post-indexed
+ return ARM::LDRD_POST;
+ default:
+ break; // fallthrough
+ }
+ break;
+ case 15:
+ switch (PW) {
+ case 2: // Offset
+ return ARM::STRD;
+ case 3: // Pre-indexed
+ return ARM::STRD_PRE;
+ case 0: // Post-indexed
+ return ARM::STRD_POST;
+ default:
+ break; // fallthrough
+ }
+ break;
+ default:
+ break; // fallthrough
+ }
+ }
+
+ // Ditto for SBCSSrs, which is a super-instruction for A8.6.152 & A8.6.153.
+ // As a result, the decoder fails to decode LDRH_POST/LDRSB_POST/LDRSH_POST
+ // properly.
+ if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 1) {
+ unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
+ switch (slice(insn, 7, 4)) {
+ case 11:
+ switch (PW) {
+ case 2: // Offset
+ return ARM::LDRH;
+ case 3: // Pre-indexed
+ return ARM::LDRH_PRE;
+ case 0: // Post-indexed
+ return ARM::LDRH_POST;
+ default:
+ break; // fallthrough
+ }
+ break;
+ case 13:
+ switch (PW) {
+ case 2: // Offset
+ return ARM::LDRSB;
+ case 3: // Pre-indexed
+ return ARM::LDRSB_PRE;
+ case 0: // Post-indexed
+ return ARM::LDRSB_POST;
+ default:
+ break; // fallthrough
+ }
+ break;
+ case 15:
+ switch (PW) {
+ case 2: // Offset
+ return ARM::LDRSH;
+ case 3: // Pre-indexed
+ return ARM::LDRSH_PRE;
+ case 0: // Post-indexed
+ return ARM::LDRSH_POST;
+ default:
+ break; // fallthrough
+ }
+ break;
+ default:
+ break; // fallthrough
+ }
+ }
+
+AutoGenedDecoder:
+ // Calling the auto-generated decoder function.
+ return decodeInstruction(insn);
+}
+
+// Helper function for special case handling of LDR (literal) and friends.
+// See, for example, A6.3.7 Load word: Table A6-18 Load word.
+// See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
+// before returning it.
+static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return Opcode; // Return unmorphed opcode.
+
+ case ARM::t2LDR_POST: case ARM::t2LDR_PRE:
+ case ARM::t2LDRi12: case ARM::t2LDRi8:
+ case ARM::t2LDRs: case ARM::t2LDRT:
+ return ARM::t2LDRpci;
+
+ case ARM::t2LDRB_POST: case ARM::t2LDRB_PRE:
+ case ARM::t2LDRBi12: case ARM::t2LDRBi8:
+ case ARM::t2LDRBs: case ARM::t2LDRBT:
+ return ARM::t2LDRBpci;
+
+ case ARM::t2LDRH_POST: case ARM::t2LDRH_PRE:
+ case ARM::t2LDRHi12: case ARM::t2LDRHi8:
+ case ARM::t2LDRHs: case ARM::t2LDRHT:
+ return ARM::t2LDRHpci;
+
+ case ARM::t2LDRSB_POST: case ARM::t2LDRSB_PRE:
+ case ARM::t2LDRSBi12: case ARM::t2LDRSBi8:
+ case ARM::t2LDRSBs: case ARM::t2LDRSBT:
+ return ARM::t2LDRSBpci;
+
+ case ARM::t2LDRSH_POST: case ARM::t2LDRSH_PRE:
+ case ARM::t2LDRSHi12: case ARM::t2LDRSHi8:
+ case ARM::t2LDRSHs: case ARM::t2LDRSHT:
+ return ARM::t2LDRSHpci;
+ }
+}
+
+/// decodeThumbSideEffect is a decorator function which can potentially twiddle
+/// the instruction or morph the returned opcode under Thumb2.
+///
+/// First it checks whether the insn is a NEON or VFP instr; if true, bit
+/// twiddling could be performed on insn to turn it into an ARM NEON/VFP
+/// equivalent instruction and decodeInstruction is called with the transformed
+/// insn.
+///
+/// Next, there is special handling for Load byte/halfword/word instruction by
+/// checking whether Rn=0b1111 and call T2Morph2LoadLiteral() on the decoded
+/// Thumb2 instruction. See comments below for further details.
+///
+/// Finally, one last check is made to see whether the insn is a NEON/VFP and
+/// decodeInstruction(insn) is invoked on the original insn.
+///
+/// Otherwise, decodeThumbInstruction is called with the original insn.
+static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) {
+ if (IsThumb2) {
+ uint16_t op1 = slice(insn, 28, 27);
+ uint16_t op2 = slice(insn, 26, 20);
+
+ // A6.3 32-bit Thumb instruction encoding
+ // Table A6-9 32-bit Thumb instruction encoding
+
+ // The coprocessor instructions of interest are transformed to their ARM
+ // equivalents.
+
+ // --------- Transform Begin Marker ---------
+ if ((op1 == 1 || op1 == 3) && slice(op2, 6, 4) == 7) {
+ // A7.4 Advanced SIMD data-processing instructions
+ // U bit of Thumb corresponds to Inst{24} of ARM.
+ uint16_t U = slice(op1, 1, 1);
+
+ // Inst{28-24} of ARM = {1,0,0,1,U};
+ uint16_t bits28_24 = 9 << 1 | U;
+ DEBUG(showBitVector(errs(), insn));
+ setSlice(insn, 28, 24, bits28_24);
+ return decodeInstruction(insn);
+ }
+
+ if (op1 == 3 && slice(op2, 6, 4) == 1 && slice(op2, 0, 0) == 0) {
+ // A7.7 Advanced SIMD element or structure load/store instructions
+ // Inst{27-24} of Thumb = 0b1001
+ // Inst{27-24} of ARM = 0b0100
+ DEBUG(showBitVector(errs(), insn));
+ setSlice(insn, 27, 24, 4);
+ return decodeInstruction(insn);
+ }
+ // --------- Transform End Marker ---------
+
+ // See, for example, A6.3.7 Load word: Table A6-18 Load word.
+ // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
+ // before returning it to our caller.
+ if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1
+ && slice(insn, 19, 16) == 15)
+ return T2Morph2LoadLiteral(decodeThumbInstruction(insn));
+
+ // One last check for NEON/VFP instructions.
+ if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1)
+ return decodeInstruction(insn);
+
+ // Fall through.
+ }
+
+ return decodeThumbInstruction(insn);
+}
+
+//
+// Public interface for the disassembler
+//
+
+bool ARMDisassembler::getInstruction(MCInst &MI,
+ uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os) const {
+ // The machine instruction.
+ uint32_t insn;
+ uint8_t bytes[4];
+
+ // We want to read exactly 4 bytes of data.
+ if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1)
+ return false;
+
+ // Encoded as a small-endian 32-bit word in the stream.
+ insn = (bytes[3] << 24) |
+ (bytes[2] << 16) |
+ (bytes[1] << 8) |
+ (bytes[0] << 0);
+
+ unsigned Opcode = decodeARMInstruction(insn);
+ ARMFormat Format = ARMFormats[Opcode];
+ Size = 4;
+
+ DEBUG({
+ errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
+ << " Format=" << stringForARMFormat(Format) << '(' << (int)Format
+ << ")\n";
+ showBitVector(errs(), insn);
+ });
+
+ ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
+ if (!Builder)
+ return false;
+
+ if (!Builder->Build(MI, insn))
+ return false;
+
+ delete Builder;
+
+ return true;
+}
+
+bool ThumbDisassembler::getInstruction(MCInst &MI,
+ uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os) const {
+ // The Thumb instruction stream is a sequence of halhwords.
+
+ // This represents the first halfword as well as the machine instruction
+ // passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top
+ // halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to
+ // the top half followed by the second halfword.
+ unsigned insn = 0;
+ // Possible second halfword.
+ uint16_t insn1 = 0;
+
+ // A6.1 Thumb instruction set encoding
+ //
+ // If bits [15:11] of the halfword being decoded take any of the following
+ // values, the halfword is the first halfword of a 32-bit instruction:
+ // o 0b11101
+ // o 0b11110
+ // o 0b11111.
+ //
+ // Otherwise, the halfword is a 16-bit instruction.
+
+ // Read 2 bytes of data first.
+ uint8_t bytes[2];
+ if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1)
+ return false;
+
+ // Encoded as a small-endian 16-bit halfword in the stream.
+ insn = (bytes[1] << 8) | bytes[0];
+ unsigned bits15_11 = slice(insn, 15, 11);
+ bool IsThumb2 = false;
+
+ // 32-bit instructions if the bits [15:11] of the halfword matches
+ // { 0b11101 /* 0x1D */, 0b11110 /* 0x1E */, ob11111 /* 0x1F */ }.
+ if (bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) {
+ IsThumb2 = true;
+ if (Region.readBytes(Address + 2, 2, (uint8_t*)bytes, NULL) == -1)
+ return false;
+ // Encoded as a small-endian 16-bit halfword in the stream.
+ insn1 = (bytes[1] << 8) | bytes[0];
+ insn = (insn << 16 | insn1);
+ }
+
+ // The insn could potentially be bit-twiddled in order to be decoded as an ARM
+ // NEON/VFP opcode. In such case, the modified insn is later disassembled as
+ // an ARM NEON/VFP instruction.
+ //
+ // This is a short term solution for lack of encoding bits specified for the
+ // Thumb2 NEON/VFP instructions. The long term solution could be adding some
+ // infrastructure to have each instruction support more than one encodings.
+ // Which encoding is used would be based on which subtarget the compiler/
+ // disassembler is working with at the time. This would allow the sharing of
+ // the NEON patterns between ARM and Thumb2, as well as potential greater
+ // sharing between the regular ARM instructions and the 32-bit wide Thumb2
+ // instructions as well.
+ unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn);
+
+ ARMFormat Format = ARMFormats[Opcode];
+ Size = IsThumb2 ? 4 : 2;
+
+ DEBUG({
+ errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
+ << " Format=" << stringForARMFormat(Format) << '(' << (int)Format
+ << ")\n";
+ showBitVector(errs(), insn);
+ });
+
+ ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
+ if (!Builder)
+ return false;
+
+ Builder->SetSession(const_cast<Session *>(&SO));
+
+ if (!Builder->Build(MI, insn))
+ return false;
+
+ delete Builder;
+
+ return true;
+}
+
+// A8.6.50
+// Valid return values are {1, 2, 3, 4}, with 0 signifying an error condition.
+static unsigned short CountITSize(unsigned ITMask) {
+ // First count the trailing zeros of the IT mask.
+ unsigned TZ = CountTrailingZeros_32(ITMask);
+ if (TZ > 3) {
+ DEBUG(errs() << "Encoding error: IT Mask '0000'");
+ return 0;
+ }
+ return (4 - TZ);
+}
+
+/// Init ITState. Note that at least one bit is always 1 in mask.
+bool Session::InitIT(unsigned short bits7_0) {
+ ITCounter = CountITSize(slice(bits7_0, 3, 0));
+ if (ITCounter == 0)
+ return false;
+
+ // A8.6.50 IT
+ unsigned short FirstCond = slice(bits7_0, 7, 4);
+ if (FirstCond == 0xF) {
+ DEBUG(errs() << "Encoding error: IT FirstCond '1111'");
+ return false;
+ }
+ if (FirstCond == 0xE && ITCounter != 1) {
+ DEBUG(errs() << "Encoding error: IT FirstCond '1110' && Mask != '1000'");
+ return false;
+ }
+
+ ITState = bits7_0;
+
+ return true;
+}
+
+/// Update ITState if necessary.
+void Session::UpdateIT() {
+ assert(ITCounter);
+ --ITCounter;
+ if (ITCounter == 0)
+ ITState = 0;
+ else {
+ unsigned short NewITState4_0 = slice(ITState, 4, 0) << 1;
+ setSlice(ITState, 4, 0, NewITState4_0);
+ }
+}
+
+static MCDisassembler *createARMDisassembler(const Target &T) {
+ return new ARMDisassembler;
+}
+
+static MCDisassembler *createThumbDisassembler(const Target &T) {
+ return new ThumbDisassembler;
+}
+
+extern "C" void LLVMInitializeARMDisassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheARMTarget,
+ createARMDisassembler);
+ TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
+ createThumbDisassembler);
+}
+
+EDInstInfo *ARMDisassembler::getEDInfo() const {
+ return instInfoARM;
+}
+
+EDInstInfo *ThumbDisassembler::getEDInfo() const {
+ return instInfoARM;
+}
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h
new file mode 100644
index 0000000..0a74a38
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h
@@ -0,0 +1,99 @@
+//===- ARMDisassembler.h - Disassembler for ARM/Thumb ISA -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains the header for ARMDisassembler and ThumbDisassembler, both are
+// subclasses of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMDISASSEMBLER_H
+#define ARMDISASSEMBLER_H
+
+#include "llvm/MC/MCDisassembler.h"
+
+namespace llvm {
+
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+struct EDInstInfo;
+
+/// ARMDisassembler - ARM disassembler for all ARM platforms.
+class ARMDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ ARMDisassembler() :
+ MCDisassembler() {
+ }
+
+ ~ARMDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ bool getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream) const;
+
+ /// getEDInfo - See MCDisassembler.
+ EDInstInfo *getEDInfo() const;
+private:
+};
+
+// Forward declaration.
+class ARMBasicMCBuilder;
+
+/// Session - Keep track of the IT Block progression.
+class Session {
+ friend class ARMBasicMCBuilder;
+public:
+ Session() : ITCounter(0), ITState(0) {}
+ ~Session() {}
+ /// InitIT - Initializes ITCounter/ITState.
+ bool InitIT(unsigned short bits7_0);
+ /// UpdateIT - Updates ITCounter/ITState as IT Block progresses.
+ void UpdateIT();
+
+private:
+ unsigned ITCounter; // Possible values: 0, 1, 2, 3, 4.
+ unsigned ITState; // A2.5.2 Consists of IT[7:5] and IT[4:0] initially.
+};
+
+/// ThumbDisassembler - Thumb disassembler for all ARM platforms.
+class ThumbDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ ThumbDisassembler() :
+ MCDisassembler(), SO() {
+ }
+
+ ~ThumbDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ bool getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream) const;
+
+ /// getEDInfo - See MCDisassembler.
+ EDInstInfo *getEDInfo() const;
+private:
+ Session SO;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
new file mode 100644
index 0000000..bac68dd
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -0,0 +1,3245 @@
+//===- ARMDisassemblerCore.cpp - ARM disassembler helpers -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains code to represent the core concepts of Builder and DisassembleFP
+// to solve the problem of disassembling an ARM instr.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "ARMDisassemblerCore.h"
+#include "ARMAddressingModes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+//#define DEBUG(X) do { X; } while (0)
+
+/// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
+/// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s
+/// describing the operand info for each ARMInsts[i].
+///
+/// Together with an instruction's encoding format, we can take advantage of the
+/// NumOperands and the OpInfo fields of the target instruction description in
+/// the quest to build out the MCOperand list for an MCInst.
+///
+/// The general guideline is that with a known format, the number of dst and src
+/// operands are well-known. The dst is built first, followed by the src
+/// operand(s). The operands not yet used at this point are for the Implicit
+/// Uses and Defs by this instr. For the Uses part, the pred:$p operand is
+/// defined with two components:
+///
+/// def pred { // Operand PredicateOperand
+/// ValueType Type = OtherVT;
+/// string PrintMethod = "printPredicateOperand";
+/// string AsmOperandLowerMethod = ?;
+/// dag MIOperandInfo = (ops i32imm, CCR);
+/// AsmOperandClass ParserMatchClass = ImmAsmOperand;
+/// dag DefaultOps = (ops (i32 14), (i32 zero_reg));
+/// }
+///
+/// which is manifested by the TargetOperandInfo[] of:
+///
+/// { 0, 0|(1<<TOI::Predicate), 0 },
+/// { ARM::CCRRegClassID, 0|(1<<TOI::Predicate), 0 }
+///
+/// So the first predicate MCOperand corresponds to the immediate part of the
+/// ARM condition field (Inst{31-28}), and the second predicate MCOperand
+/// corresponds to a register kind of ARM::CPSR.
+///
+/// For the Defs part, in the simple case of only cc_out:$s, we have:
+///
+/// def cc_out { // Operand OptionalDefOperand
+/// ValueType Type = OtherVT;
+/// string PrintMethod = "printSBitModifierOperand";
+/// string AsmOperandLowerMethod = ?;
+/// dag MIOperandInfo = (ops CCR);
+/// AsmOperandClass ParserMatchClass = ImmAsmOperand;
+/// dag DefaultOps = (ops (i32 zero_reg));
+/// }
+///
+/// which is manifested by the one TargetOperandInfo of:
+///
+/// { ARM::CCRRegClassID, 0|(1<<TOI::OptionalDef), 0 }
+///
+/// And this maps to one MCOperand with the regsiter kind of ARM::CPSR.
+#include "ARMGenInstrInfo.inc"
+
+using namespace llvm;
+
+const char *ARMUtils::OpcodeName(unsigned Opcode) {
+ return ARMInsts[Opcode].Name;
+}
+
+// Return the register enum Based on RegClass and the raw register number.
+// FIXME: Auto-gened?
+static unsigned
+getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) {
+ // For this purpose, we can treat rGPR as if it were GPR.
+ if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID;
+
+ // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm().
+ unsigned RegNum =
+ RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister;
+
+ switch (RegNum) {
+ default:
+ break;
+ case 0:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R0;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D0;
+ case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ return ARM::Q0;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S0;
+ }
+ break;
+ case 1:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R1;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D1;
+ case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ return ARM::Q1;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S1;
+ }
+ break;
+ case 2:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R2;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D2;
+ case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ return ARM::Q2;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S2;
+ }
+ break;
+ case 3:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R3;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D3;
+ case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ return ARM::Q3;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S3;
+ }
+ break;
+ case 4:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R4;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D4;
+ case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q4;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S4;
+ }
+ break;
+ case 5:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R5;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D5;
+ case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q5;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S5;
+ }
+ break;
+ case 6:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R6;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D6;
+ case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q6;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S6;
+ }
+ break;
+ case 7:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R7;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D7;
+ case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q7;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S7;
+ }
+ break;
+ case 8:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::R8;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D8;
+ case ARM::QPRRegClassID: return ARM::Q8;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S8;
+ }
+ break;
+ case 9:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::R9;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D9;
+ case ARM::QPRRegClassID: return ARM::Q9;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S9;
+ }
+ break;
+ case 10:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::R10;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D10;
+ case ARM::QPRRegClassID: return ARM::Q10;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S10;
+ }
+ break;
+ case 11:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::R11;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D11;
+ case ARM::QPRRegClassID: return ARM::Q11;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S11;
+ }
+ break;
+ case 12:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::R12;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D12;
+ case ARM::QPRRegClassID: return ARM::Q12;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S12;
+ }
+ break;
+ case 13:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::SP;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D13;
+ case ARM::QPRRegClassID: return ARM::Q13;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S13;
+ }
+ break;
+ case 14:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::LR;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D14;
+ case ARM::QPRRegClassID: return ARM::Q14;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S14;
+ }
+ break;
+ case 15:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::PC;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D15;
+ case ARM::QPRRegClassID: return ARM::Q15;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S15;
+ }
+ break;
+ case 16:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D16;
+ case ARM::SPRRegClassID: return ARM::S16;
+ }
+ break;
+ case 17:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D17;
+ case ARM::SPRRegClassID: return ARM::S17;
+ }
+ break;
+ case 18:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D18;
+ case ARM::SPRRegClassID: return ARM::S18;
+ }
+ break;
+ case 19:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D19;
+ case ARM::SPRRegClassID: return ARM::S19;
+ }
+ break;
+ case 20:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D20;
+ case ARM::SPRRegClassID: return ARM::S20;
+ }
+ break;
+ case 21:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D21;
+ case ARM::SPRRegClassID: return ARM::S21;
+ }
+ break;
+ case 22:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D22;
+ case ARM::SPRRegClassID: return ARM::S22;
+ }
+ break;
+ case 23:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D23;
+ case ARM::SPRRegClassID: return ARM::S23;
+ }
+ break;
+ case 24:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D24;
+ case ARM::SPRRegClassID: return ARM::S24;
+ }
+ break;
+ case 25:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D25;
+ case ARM::SPRRegClassID: return ARM::S25;
+ }
+ break;
+ case 26:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D26;
+ case ARM::SPRRegClassID: return ARM::S26;
+ }
+ break;
+ case 27:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D27;
+ case ARM::SPRRegClassID: return ARM::S27;
+ }
+ break;
+ case 28:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D28;
+ case ARM::SPRRegClassID: return ARM::S28;
+ }
+ break;
+ case 29:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D29;
+ case ARM::SPRRegClassID: return ARM::S29;
+ }
+ break;
+ case 30:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D30;
+ case ARM::SPRRegClassID: return ARM::S30;
+ }
+ break;
+ case 31:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D31;
+ case ARM::SPRRegClassID: return ARM::S31;
+ }
+ break;
+ }
+ DEBUG(errs() << "Invalid (RegClassID, RawRegister) combination\n");
+ // Encoding error. Mark the builder with error code != 0.
+ B->SetErr(-1);
+ return 0;
+}
+
+///////////////////////////////
+// //
+// Utility Functions //
+// //
+///////////////////////////////
+
+// Extract/Decode Rd: Inst{15-12}.
+static inline unsigned decodeRd(uint32_t insn) {
+ return (insn >> ARMII::RegRdShift) & ARMII::GPRRegMask;
+}
+
+// Extract/Decode Rn: Inst{19-16}.
+static inline unsigned decodeRn(uint32_t insn) {
+ return (insn >> ARMII::RegRnShift) & ARMII::GPRRegMask;
+}
+
+// Extract/Decode Rm: Inst{3-0}.
+static inline unsigned decodeRm(uint32_t insn) {
+ return (insn & ARMII::GPRRegMask);
+}
+
+// Extract/Decode Rs: Inst{11-8}.
+static inline unsigned decodeRs(uint32_t insn) {
+ return (insn >> ARMII::RegRsShift) & ARMII::GPRRegMask;
+}
+
+static inline unsigned getCondField(uint32_t insn) {
+ return (insn >> ARMII::CondShift);
+}
+
+static inline unsigned getIBit(uint32_t insn) {
+ return (insn >> ARMII::I_BitShift) & 1;
+}
+
+static inline unsigned getAM3IBit(uint32_t insn) {
+ return (insn >> ARMII::AM3_I_BitShift) & 1;
+}
+
+static inline unsigned getPBit(uint32_t insn) {
+ return (insn >> ARMII::P_BitShift) & 1;
+}
+
+static inline unsigned getUBit(uint32_t insn) {
+ return (insn >> ARMII::U_BitShift) & 1;
+}
+
+static inline unsigned getPUBits(uint32_t insn) {
+ return (insn >> ARMII::U_BitShift) & 3;
+}
+
+static inline unsigned getSBit(uint32_t insn) {
+ return (insn >> ARMII::S_BitShift) & 1;
+}
+
+static inline unsigned getWBit(uint32_t insn) {
+ return (insn >> ARMII::W_BitShift) & 1;
+}
+
+static inline unsigned getDBit(uint32_t insn) {
+ return (insn >> ARMII::D_BitShift) & 1;
+}
+
+static inline unsigned getNBit(uint32_t insn) {
+ return (insn >> ARMII::N_BitShift) & 1;
+}
+
+static inline unsigned getMBit(uint32_t insn) {
+ return (insn >> ARMII::M_BitShift) & 1;
+}
+
+// See A8.4 Shifts applied to a register.
+// A8.4.2 Register controlled shifts.
+//
+// getShiftOpcForBits - getShiftOpcForBits translates from the ARM encoding bits
+// into llvm enums for shift opcode. The API clients should pass in the value
+// encoded with two bits, so the assert stays to signal a wrong API usage.
+//
+// A8-12: DecodeRegShift()
+static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) {
+ switch (bits) {
+ default: assert(0 && "No such value"); return ARM_AM::no_shift;
+ case 0: return ARM_AM::lsl;
+ case 1: return ARM_AM::lsr;
+ case 2: return ARM_AM::asr;
+ case 3: return ARM_AM::ror;
+ }
+}
+
+// See A8.4 Shifts applied to a register.
+// A8.4.1 Constant shifts.
+//
+// getImmShiftSE - getImmShiftSE translates from the raw ShiftOpc and raw Imm5
+// encodings into the intended ShiftOpc and shift amount.
+//
+// A8-11: DecodeImmShift()
+static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) {
+ if (ShImm != 0)
+ return;
+ switch (ShOp) {
+ case ARM_AM::no_shift:
+ case ARM_AM::rrx:
+ break;
+ case ARM_AM::lsl:
+ ShOp = ARM_AM::no_shift;
+ break;
+ case ARM_AM::lsr:
+ case ARM_AM::asr:
+ ShImm = 32;
+ break;
+ case ARM_AM::ror:
+ ShOp = ARM_AM::rrx;
+ break;
+ }
+}
+
+// getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding
+// bits Inst{24-23} (P(24) and U(23)) into llvm enums for AMSubMode. The API
+// clients should pass in the value encoded with two bits, so the assert stays
+// to signal a wrong API usage.
+static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) {
+ switch (bits) {
+ default: assert(0 && "No such value"); return ARM_AM::bad_am_submode;
+ case 1: return ARM_AM::ia; // P=0 U=1
+ case 3: return ARM_AM::ib; // P=1 U=1
+ case 0: return ARM_AM::da; // P=0 U=0
+ case 2: return ARM_AM::db; // P=1 U=0
+ }
+}
+
+////////////////////////////////////////////
+// //
+// Disassemble function definitions //
+// //
+////////////////////////////////////////////
+
+/// There is a separate Disassemble*Frm function entry for disassembly of an ARM
+/// instr into a list of MCOperands in the appropriate order, with possible dst,
+/// followed by possible src(s).
+///
+/// The processing of the predicate, and the 'S' modifier bit, if MI modifies
+/// the CPSR, is factored into ARMBasicMCBuilder's method named
+/// TryPredicateAndSBitModifier.
+
+static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+ assert(0 && "Unexpected pseudo instruction!");
+ return false;
+}
+
+// Multiply Instructions.
+// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLS:
+// Rd{19-16} Rn{3-0} Rm{11-8} Ra{15-12}
+//
+// MUL, SMMUL, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT:
+// Rd{19-16} Rn{3-0} Rm{11-8}
+//
+// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT:
+// RdLo{15-12} RdHi{19-16} Rn{3-0} Rm{11-8}
+//
+// The mapping of the multiply registers to the "regular" ARM registers, where
+// there are convenience decoder functions, is:
+//
+// Inst{15-12} => Rd
+// Inst{19-16} => Rn
+// Inst{3-0} => Rm
+// Inst{11-8} => Rs
+static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ unsigned short NumDefs = TID.getNumDefs();
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumDefs > 0 && "NumDefs should be greater than 0 for MulFrm");
+ assert(NumOps >= 3
+ && OpInfo[0].RegClass == ARM::GPRRegClassID
+ && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && OpInfo[2].RegClass == ARM::GPRRegClassID
+ && "Expect three register operands");
+
+ // Instructions with two destination registers have RdLo{15-12} first.
+ if (NumDefs == 2) {
+ assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID &&
+ "Expect 4th register operand");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+ }
+
+ // The destination register: RdHi{19-16} or Rd{19-16}.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+
+ // The two src regsiters: Rn{3-0}, then Rm{11-8}.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRs(insn))));
+ OpIdx += 3;
+
+ // Many multiply instructions (e.g., MLA) have three src registers.
+ // The third register operand is Ra{15-12}.
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+// Helper routines for disassembly of coprocessor instructions.
+
+static bool LdStCopOpcode(unsigned Opcode) {
+ if ((Opcode >= ARM::LDC2L_OFFSET && Opcode <= ARM::LDC_PRE) ||
+ (Opcode >= ARM::STC2L_OFFSET && Opcode <= ARM::STC_PRE))
+ return true;
+ return false;
+}
+static bool CoprocessorOpcode(unsigned Opcode) {
+ if (LdStCopOpcode(Opcode))
+ return true;
+
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::CDP: case ARM::CDP2:
+ case ARM::MCR: case ARM::MCR2: case ARM::MRC: case ARM::MRC2:
+ case ARM::MCRR: case ARM::MCRR2: case ARM::MRRC: case ARM::MRRC2:
+ return true;
+ }
+}
+static inline unsigned GetCoprocessor(uint32_t insn) {
+ return slice(insn, 11, 8);
+}
+static inline unsigned GetCopOpc1(uint32_t insn, bool CDP) {
+ return CDP ? slice(insn, 23, 20) : slice(insn, 23, 21);
+}
+static inline unsigned GetCopOpc2(uint32_t insn) {
+ return slice(insn, 7, 5);
+}
+static inline unsigned GetCopOpc(uint32_t insn) {
+ return slice(insn, 7, 4);
+}
+// Most of the operands are in immediate forms, except Rd and Rn, which are ARM
+// core registers.
+//
+// CDP, CDP2: cop opc1 CRd CRn CRm opc2
+//
+// MCR, MCR2, MRC, MRC2: cop opc1 Rd CRn CRm opc2
+//
+// MCRR, MCRR2, MRRC, MRRc2: cop opc Rd Rn CRm
+//
+// LDC_OFFSET, LDC_PRE, LDC_POST: cop CRd Rn R0 [+/-]imm8:00
+// and friends
+// STC_OFFSET, STC_PRE, STC_POST: cop CRd Rn R0 [+/-]imm8:00
+// and friends
+// <-- addrmode2 -->
+//
+// LDC_OPTION: cop CRd Rn imm8
+// and friends
+// STC_OPTION: cop CRd Rn imm8
+// and friends
+//
+static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 5 && "Num of operands >= 5 for coprocessor instr");
+
+ unsigned &OpIdx = NumOpsAdded;
+ bool OneCopOpc = (Opcode == ARM::MCRR || Opcode == ARM::MCRR2 ||
+ Opcode == ARM::MRRC || Opcode == ARM::MRRC2);
+ // CDP/CDP2 has no GPR operand; the opc1 operand is also wider (Inst{23-20}).
+ bool NoGPR = (Opcode == ARM::CDP || Opcode == ARM::CDP2);
+ bool LdStCop = LdStCopOpcode(Opcode);
+
+ OpIdx = 0;
+
+ MI.addOperand(MCOperand::CreateImm(GetCoprocessor(insn)));
+
+ if (LdStCop) {
+ // Unindex if P:W = 0b00 --> _OPTION variant
+ unsigned PW = getPBit(insn) << 1 | getWBit(insn);
+
+ MI.addOperand(MCOperand::CreateImm(decodeRd(insn)));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+
+ if (PW) {
+ MI.addOperand(MCOperand::CreateReg(0));
+ ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+ unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2,
+ ARM_AM::no_shift);
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ OpIdx = 5;
+ } else {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 0)));
+ OpIdx = 4;
+ }
+ } else {
+ MI.addOperand(MCOperand::CreateImm(OneCopOpc ? GetCopOpc(insn)
+ : GetCopOpc1(insn, NoGPR)));
+
+ MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn))
+ : MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ MI.addOperand(OneCopOpc ? MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn)))
+ : MCOperand::CreateImm(decodeRn(insn)));
+
+ MI.addOperand(MCOperand::CreateImm(decodeRm(insn)));
+
+ OpIdx = 5;
+
+ if (!OneCopOpc) {
+ MI.addOperand(MCOperand::CreateImm(GetCopOpc2(insn)));
+ ++OpIdx;
+ }
+ }
+
+ return true;
+}
+
+// Branch Instructions.
+// BLr9: SignExtend(Imm24:'00', 32)
+// Bcc, BLr9_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1
+// SMC: ZeroExtend(imm4, 32)
+// SVC: ZeroExtend(Imm24, 32)
+//
+// Various coprocessor instructions are assigned BrFrm arbitrarily.
+// Delegates to DisassembleCoprocessor() helper function.
+//
+// MRS/MRSsys: Rd
+// MSR/MSRsys: Rm mask=Inst{19-16}
+// BXJ: Rm
+// MSRi/MSRsysi: so_imm
+// SRSW/SRS: ldstm_mode:$amode mode_imm
+// RFEW/RFE: ldstm_mode:$amode Rn
+static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ if (CoprocessorOpcode(Opcode))
+ return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ // MRS and MRSsys take one GPR reg Rd.
+ if (Opcode == ARM::MRS || Opcode == ARM::MRSsys) {
+ assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ NumOpsAdded = 1;
+ return true;
+ }
+ // BXJ takes one GPR reg Rm.
+ if (Opcode == ARM::BXJ) {
+ assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ NumOpsAdded = 1;
+ return true;
+ }
+ // MSR take a mask, followed by one GPR reg Rm. The mask contains the R Bit in
+ // bit 4, and the special register fields in bits 3-0.
+ if (Opcode == ARM::MSR) {
+ assert(NumOps >= 1 && OpInfo[1].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
+ slice(insn, 19, 16) /* Special Reg */ ));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ NumOpsAdded = 2;
+ return true;
+ }
+ // MSRi take a mask, followed by one so_imm operand. The mask contains the
+ // R Bit in bit 4, and the special register fields in bits 3-0.
+ if (Opcode == ARM::MSRi) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
+ slice(insn, 19, 16) /* Special Reg */ ));
+ // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
+ // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
+ // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
+ unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
+ unsigned Imm = insn & 0xFF;
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
+ NumOpsAdded = 2;
+ return true;
+ }
+ if (Opcode == ARM::SRSW || Opcode == ARM::SRS ||
+ Opcode == ARM::RFEW || Opcode == ARM::RFE) {
+ ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
+
+ if (Opcode == ARM::SRSW || Opcode == ARM::SRS)
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
+ else
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ NumOpsAdded = 3;
+ return true;
+ }
+
+ assert((Opcode == ARM::Bcc || Opcode == ARM::BLr9 || Opcode == ARM::BLr9_pred
+ || Opcode == ARM::SMC || Opcode == ARM::SVC) &&
+ "Unexpected Opcode");
+
+ assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Reg operand expected");
+
+ int Imm32 = 0;
+ if (Opcode == ARM::SMC) {
+ // ZeroExtend(imm4, 32) where imm24 = Inst{3-0}.
+ Imm32 = slice(insn, 3, 0);
+ } else if (Opcode == ARM::SVC) {
+ // ZeroExtend(imm24, 32) where imm24 = Inst{23-0}.
+ Imm32 = slice(insn, 23, 0);
+ } else {
+ // SignExtend(imm24:'00', 32) where imm24 = Inst{23-0}.
+ unsigned Imm26 = slice(insn, 23, 0) << 2;
+ //Imm32 = signextend<signed int, 26>(Imm26);
+ Imm32 = SignExtend32<26>(Imm26);
+
+ // When executing an ARM instruction, PC reads as the address of the current
+ // instruction plus 8. The assembler subtracts 8 from the difference
+ // between the branch instruction and the target address, disassembler has
+ // to add 8 to compensate.
+ Imm32 += 8;
+ }
+
+ MI.addOperand(MCOperand::CreateImm(Imm32));
+ NumOpsAdded = 1;
+
+ return true;
+}
+
+// Misc. Branch Instructions.
+// BLXr9, BXr9
+// BX, BX_RET
+static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ // BX_RET and MOVPCLR have only two predicate operands; do an early return.
+ if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR)
+ return true;
+
+ // BLXr9 and BX take one GPR reg.
+ if (Opcode == ARM::BLXr9 || Opcode == ARM::BX) {
+ assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ OpIdx = 1;
+ return true;
+ }
+
+ return false;
+}
+
+static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) {
+ uint32_t lsb = slice(insn, 11, 7);
+ uint32_t msb = slice(insn, 20, 16);
+ uint32_t Val = 0;
+ if (msb < lsb) {
+ DEBUG(errs() << "Encoding error: msb < lsb\n");
+ return false;
+ }
+
+ for (uint32_t i = lsb; i <= msb; ++i)
+ Val |= (1 << i);
+ mask = ~Val;
+ return true;
+}
+
+// A major complication is the fact that some of the saturating add/subtract
+// operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
+// They are QADD, QDADD, QDSUB, and QSUB.
+static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ unsigned short NumDefs = TID.getNumDefs();
+ bool isUnary = isUnaryDP(TID.TSFlags);
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ // Disassemble register def if there is one.
+ if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+ }
+
+ // Now disassemble the src operands.
+ if (OpIdx >= NumOps)
+ return false;
+
+ // Special-case handling of BFC/BFI/SBFX/UBFX.
+ if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
+ MI.addOperand(MCOperand::CreateReg(0));
+ if (Opcode == ARM::BFI) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ ++OpIdx;
+ }
+ uint32_t mask = 0;
+ if (!getBFCInvMask(insn, mask))
+ return false;
+
+ MI.addOperand(MCOperand::CreateImm(mask));
+ OpIdx += 2;
+ return true;
+ }
+ if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7)));
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 16) + 1));
+ OpIdx += 3;
+ return true;
+ }
+
+ bool RmRn = (Opcode == ARM::QADD || Opcode == ARM::QDADD ||
+ Opcode == ARM::QDSUB || Opcode == ARM::QSUB);
+
+ // BinaryDP has an Rn operand.
+ if (!isUnary) {
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::GPRRegClassID,
+ RmRn ? decodeRm(insn) : decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ // If this is a two-address operand, skip it, e.g., MOVCCr operand 1.
+ if (isUnary && (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) {
+ MI.addOperand(MCOperand::CreateReg(0));
+ ++OpIdx;
+ }
+
+ // Now disassemble operand 2.
+ if (OpIdx >= NumOps)
+ return false;
+
+ if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
+ // We have a reg/reg form.
+ // Assert disabled because saturating operations, e.g., A8.6.127 QASX, are
+ // routed here as well.
+ // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form");
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::GPRRegClassID,
+ RmRn? decodeRn(insn) : decodeRm(insn))));
+ ++OpIdx;
+ } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) {
+ // We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}).
+ assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
+ unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0);
+ MI.addOperand(MCOperand::CreateImm(Imm16));
+ ++OpIdx;
+ } else {
+ // We have a reg/imm form.
+ // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
+ // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
+ // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
+ assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
+ unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
+ unsigned Imm = insn & 0xFF;
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ unsigned short NumDefs = TID.getNumDefs();
+ bool isUnary = isUnaryDP(TID.TSFlags);
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ // Disassemble register def if there is one.
+ if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+ }
+
+ // Disassemble the src operands.
+ if (OpIdx >= NumOps)
+ return false;
+
+ // BinaryDP has an Rn operand.
+ if (!isUnary) {
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ // If this is a two-address operand, skip it, e.g., MOVCCs operand 1.
+ if (isUnary && (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) {
+ MI.addOperand(MCOperand::CreateReg(0));
+ ++OpIdx;
+ }
+
+ // Disassemble operand 2, which consists of three components.
+ if (OpIdx + 2 >= NumOps)
+ return false;
+
+ assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
+ (OpInfo[OpIdx+1].RegClass == ARM::GPRRegClassID) &&
+ (OpInfo[OpIdx+2].RegClass < 0) &&
+ "Expect 3 reg operands");
+
+ // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1.
+ unsigned Rs = slice(insn, 4, 4);
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ if (Rs) {
+ // Register-controlled shifts: [Rm, Rs, shift].
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRs(insn))));
+ // Inst{6-5} encodes the shift opcode.
+ ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, 0)));
+ } else {
+ // Constant shifts: [Rm, reg0, shift_imm].
+ MI.addOperand(MCOperand::CreateReg(0)); // NoRegister
+ // Inst{6-5} encodes the shift opcode.
+ ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+ // Inst{11-7} encodes the imm5 shift amount.
+ unsigned ShImm = slice(insn, 11, 7);
+
+ // A8.4.1. Possible rrx or shift amount of 32...
+ getImmShiftSE(ShOp, ShImm);
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShImm)));
+ }
+ OpIdx += 3;
+
+ return true;
+}
+
+static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ bool isPrePost = isPrePostLdSt(TID.TSFlags);
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ if (!OpInfo) return false;
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(((!isStore && TID.getNumDefs() > 0) ||
+ (isStore && (TID.getNumDefs() == 0 || isPrePost)))
+ && "Invalid arguments");
+
+ // Operand 0 of a pre- and post-indexed store is the address base writeback.
+ if (isPrePost && isStore) {
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ // Disassemble the dst/src operand.
+ if (OpIdx >= NumOps)
+ return false;
+
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+
+ // After dst of a pre- and post-indexed load is the address base writeback.
+ if (isPrePost && !isStore) {
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ // Disassemble the base operand.
+ if (OpIdx >= NumOps)
+ return false;
+
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
+ && "Index mode or tied_to operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+
+ // For reg/reg form, base reg is followed by +/- reg shop imm.
+ // For immediate form, it is followed by +/- imm12.
+ // See also ARMAddressingModes.h (Addressing Mode #2).
+ if (OpIdx + 1 >= NumOps)
+ return false;
+
+ assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
+ (OpInfo[OpIdx+1].RegClass < 0) &&
+ "Expect 1 reg operand followed by 1 imm operand");
+
+ ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+ if (getIBit(insn) == 0) {
+ MI.addOperand(MCOperand::CreateReg(0));
+
+ // Disassemble the 12-bit immediate offset.
+ unsigned Imm12 = slice(insn, 11, 0);
+ unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift);
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ } else {
+ // Disassemble the offset reg (Rm), shift type, and immediate shift length.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ // Inst{6-5} encodes the shift opcode.
+ ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+ // Inst{11-7} encodes the imm5 shift amount.
+ unsigned ShImm = slice(insn, 11, 7);
+
+ // A8.4.1. Possible rrx or shift amount of 32...
+ getImmShiftSE(ShOp, ShImm);
+ MI.addOperand(MCOperand::CreateImm(
+ ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp)));
+ }
+ OpIdx += 2;
+
+ return true;
+}
+
+static bool DisassembleLdFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, B);
+}
+
+static bool DisassembleStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
+}
+
+static bool HasDualReg(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST:
+ case ARM::STRD: case ARM::STRD_PRE: case ARM::STRD_POST:
+ return true;
+ }
+}
+
+static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ bool isPrePost = isPrePostLdSt(TID.TSFlags);
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ if (!OpInfo) return false;
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(((!isStore && TID.getNumDefs() > 0) ||
+ (isStore && (TID.getNumDefs() == 0 || isPrePost)))
+ && "Invalid arguments");
+
+ // Operand 0 of a pre- and post-indexed store is the address base writeback.
+ if (isPrePost && isStore) {
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ bool DualReg = HasDualReg(Opcode);
+
+ // Disassemble the dst/src operand.
+ if (OpIdx >= NumOps)
+ return false;
+
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+
+ // Fill in LDRD and STRD's second operand.
+ if (DualReg) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn) + 1)));
+ ++OpIdx;
+ }
+
+ // After dst of a pre- and post-indexed load is the address base writeback.
+ if (isPrePost && !isStore) {
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ // Disassemble the base operand.
+ if (OpIdx >= NumOps)
+ return false;
+
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
+ && "Index mode or tied_to operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+
+ // For reg/reg form, base reg is followed by +/- reg.
+ // For immediate form, it is followed by +/- imm8.
+ // See also ARMAddressingModes.h (Addressing Mode #3).
+ if (OpIdx + 1 >= NumOps)
+ return false;
+
+ assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
+ (OpInfo[OpIdx+1].RegClass < 0) &&
+ "Expect 1 reg operand followed by 1 imm operand");
+
+ ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+ if (getAM3IBit(insn) == 1) {
+ MI.addOperand(MCOperand::CreateReg(0));
+
+ // Disassemble the 8-bit immediate offset.
+ unsigned Imm4H = (insn >> ARMII::ImmHiShift) & 0xF;
+ unsigned Imm4L = insn & 0xF;
+ unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L);
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ } else {
+ // Disassemble the offset reg (Rm).
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0);
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ }
+ OpIdx += 2;
+
+ return true;
+}
+
+static bool DisassembleLdMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false,
+ B);
+}
+
+static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
+}
+
+// The algorithm for disassembly of LdStMulFrm is different from others because
+// it explicitly populates the two predicate operands after operand 0 (the base)
+// and operand 1 (the AM4 mode imm). After operand 3, we need to populate the
+// reglist with each affected register encoded as an MCOperand.
+static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5");
+ NumOpsAdded = 0;
+
+ unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+
+ // Writeback to base, if necessary.
+ if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::STMIA_UPD ||
+ Opcode == ARM::LDMDA_UPD || Opcode == ARM::STMDA_UPD ||
+ Opcode == ARM::LDMDB_UPD || Opcode == ARM::STMDB_UPD ||
+ Opcode == ARM::LDMIB_UPD || Opcode == ARM::STMIB_UPD) {
+ MI.addOperand(MCOperand::CreateReg(Base));
+ ++NumOpsAdded;
+ }
+
+ // Add the base register operand.
+ MI.addOperand(MCOperand::CreateReg(Base));
+
+ // Handling the two predicate operands before the reglist.
+ int64_t CondVal = insn >> ARMII::CondShift;
+ MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal));
+ MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+
+ NumOpsAdded += 3;
+
+ // Fill the variadic part of reglist.
+ unsigned RegListBits = insn & ((1 << 16) - 1);
+ for (unsigned i = 0; i < 16; ++i) {
+ if ((RegListBits >> i) & 1) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ i)));
+ ++NumOpsAdded;
+ }
+ }
+
+ return true;
+}
+
+// LDREX, LDREXB, LDREXH: Rd Rn
+// LDREXD: Rd Rd+1 Rn
+// STREX, STREXB, STREXH: Rd Rm Rn
+// STREXD: Rd Rm Rm+1 Rn
+//
+// SWP, SWPB: Rd Rm Rn
+static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2
+ && OpInfo[0].RegClass == ARM::GPRRegClassID
+ && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && "Expect 2 reg operands");
+
+ bool isStore = slice(insn, 20, 20) == 0;
+ bool isDW = (Opcode == ARM::LDREXD || Opcode == ARM::STREXD);
+
+ // Add the destination operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+
+ // Store register Exclusive needs a source operand.
+ if (isStore) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ ++OpIdx;
+
+ if (isDW) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn)+1)));
+ ++OpIdx;
+ }
+ } else if (isDW) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn)+1)));
+ ++OpIdx;
+ }
+
+ // Finally add the pointer operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+
+ return true;
+}
+
+// Misc. Arithmetic Instructions.
+// CLZ: Rd Rm
+// PKHBT, PKHTB: Rd Rn Rm , LSL/ASR #imm5
+// RBIT, REV, REV16, REVSH: Rd Rm
+static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2
+ && OpInfo[0].RegClass == ARM::GPRRegClassID
+ && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && "Expect 2 reg operands");
+
+ bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+
+ if (ThreeReg) {
+ assert(NumOps >= 4 && "Expect >= 4 operands");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ ++OpIdx;
+
+ // If there is still an operand info left which is an immediate operand, add
+ // an additional imm5 LSL/ASR operand.
+ if (ThreeReg && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ // Extract the 5-bit immediate field Inst{11-7}.
+ unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F;
+ ARM_AM::ShiftOpc Opc = ARM_AM::no_shift;
+ if (Opcode == ARM::PKHBT)
+ Opc = ARM_AM::lsl;
+ else if (Opcode == ARM::PKHBT)
+ Opc = ARM_AM::asr;
+ getImmShiftSE(Opc, ShiftAmt);
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt)));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+/// DisassembleSatFrm - Disassemble saturate instructions:
+/// SSAT, SSAT16, USAT, and USAT16.
+static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+
+ // Disassemble register def.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ unsigned Pos = slice(insn, 20, 16);
+ if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16)
+ Pos += 1;
+ MI.addOperand(MCOperand::CreateImm(Pos));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+
+ if (NumOpsAdded == 4) {
+ ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl);
+ // Inst{11-7} encodes the imm5 shift amount.
+ unsigned ShAmt = slice(insn, 11, 7);
+ if (ShAmt == 0) {
+ // A8.6.183. Possible ASR shift amount of 32...
+ if (Opc == ARM_AM::asr)
+ ShAmt = 32;
+ else
+ Opc = ARM_AM::no_shift;
+ }
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
+ }
+ return true;
+}
+
+// Extend instructions.
+// SXT* and UXT*: Rd [Rn] Rm [rot_imm].
+// The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the
+// three register operand form. Otherwise, Rn=0b1111 and only Rm is used.
+static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2
+ && OpInfo[0].RegClass == ARM::GPRRegClassID
+ && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && "Expect 2 reg operands");
+
+ bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+
+ if (ThreeReg) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ ++OpIdx;
+
+ // If there is still an operand info left which is an immediate operand, add
+ // an additional rotate immediate operand.
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ // Extract the 2-bit rotate field Inst{11-10}.
+ unsigned rot = (insn >> ARMII::ExtRotImmShift) & 3;
+ // Rotation by 8, 16, or 24 bits.
+ MI.addOperand(MCOperand::CreateImm(rot << 3));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+/////////////////////////////////////
+// //
+// Utility Functions For VFP //
+// //
+/////////////////////////////////////
+
+// Extract/Decode Dd/Sd:
+//
+// SP => d = UInt(Vd:D)
+// DP => d = UInt(D:Vd)
+static unsigned decodeVFPRd(uint32_t insn, bool isSPVFP) {
+ return isSPVFP ? (decodeRd(insn) << 1 | getDBit(insn))
+ : (decodeRd(insn) | getDBit(insn) << 4);
+}
+
+// Extract/Decode Dn/Sn:
+//
+// SP => n = UInt(Vn:N)
+// DP => n = UInt(N:Vn)
+static unsigned decodeVFPRn(uint32_t insn, bool isSPVFP) {
+ return isSPVFP ? (decodeRn(insn) << 1 | getNBit(insn))
+ : (decodeRn(insn) | getNBit(insn) << 4);
+}
+
+// Extract/Decode Dm/Sm:
+//
+// SP => m = UInt(Vm:M)
+// DP => m = UInt(M:Vm)
+static unsigned decodeVFPRm(uint32_t insn, bool isSPVFP) {
+ return isSPVFP ? (decodeRm(insn) << 1 | getMBit(insn))
+ : (decodeRm(insn) | getMBit(insn) << 4);
+}
+
+// A7.5.1
+static APInt VFPExpandImm(unsigned char byte, unsigned N) {
+ assert(N == 32 || N == 64);
+
+ uint64_t Result;
+ unsigned bit6 = slice(byte, 6, 6);
+ if (N == 32) {
+ Result = slice(byte, 7, 7) << 31 | slice(byte, 5, 0) << 19;
+ if (bit6)
+ Result |= 0x1f << 25;
+ else
+ Result |= 0x1 << 30;
+ } else {
+ Result = (uint64_t)slice(byte, 7, 7) << 63 |
+ (uint64_t)slice(byte, 5, 0) << 48;
+ if (bit6)
+ Result |= 0xffULL << 54;
+ else
+ Result |= 0x1ULL << 62;
+ }
+ return APInt(N, Result);
+}
+
+// VFP Unary Format Instructions:
+//
+// VCMP[E]ZD, VCMP[E]ZS: compares one floating-point register with zero
+// VCVTDS, VCVTSD: converts between double-precision and single-precision
+// The rest of the instructions have homogeneous [VFP]Rd and [VFP]Rm registers.
+static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1");
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ unsigned RegClass = OpInfo[OpIdx].RegClass;
+ assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
+ "Reg operand expected");
+ bool isSP = (RegClass == ARM::SPRRegClassID);
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
+ ++OpIdx;
+
+ // Early return for compare with zero instructions.
+ if (Opcode == ARM::VCMPEZD || Opcode == ARM::VCMPEZS
+ || Opcode == ARM::VCMPZD || Opcode == ARM::VCMPZS)
+ return true;
+
+ RegClass = OpInfo[OpIdx].RegClass;
+ assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
+ "Reg operand expected");
+ isSP = (RegClass == ARM::SPRRegClassID);
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
+ ++OpIdx;
+
+ return true;
+}
+
+// All the instructions have homogeneous [VFP]Rd, [VFP]Rn, and [VFP]Rm regs.
+// Some of them have operand constraints which tie the first operand in the
+// InOperandList to that of the dst. As far as asm printing is concerned, this
+// tied_to operand is simply skipped.
+static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3");
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ unsigned RegClass = OpInfo[OpIdx].RegClass;
+ assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
+ "Reg operand expected");
+ bool isSP = (RegClass == ARM::SPRRegClassID);
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
+ ++OpIdx;
+
+ // Skip tied_to operand constraint.
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+ assert(NumOps >= 4 && "Expect >=4 operands");
+ MI.addOperand(MCOperand::CreateReg(0));
+ ++OpIdx;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass, decodeVFPRn(insn, isSP))));
+ ++OpIdx;
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
+ ++OpIdx;
+
+ return true;
+}
+
+// A8.6.295 vcvt (floating-point <-> integer)
+// Int to FP: VSITOD, VSITOS, VUITOD, VUITOS
+// FP to Int: VTOSI[Z|R]D, VTOSI[Z|R]S, VTOUI[Z|R]D, VTOUI[Z|R]S
+//
+// A8.6.297 vcvt (floating-point and fixed-point)
+// Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i))
+static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2");
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ if (!OpInfo) return false;
+
+ bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297
+ bool fixed_point = slice(insn, 17, 17) == 1; // A8.6.297
+ unsigned RegClassID = SP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
+
+ if (fixed_point) {
+ // A8.6.297
+ assert(NumOps >= 3 && "Expect >= 3 operands");
+ int size = slice(insn, 7, 7) == 0 ? 16 : 32;
+ int fbits = size - (slice(insn,3,0) << 1 | slice(insn,5,5));
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClassID,
+ decodeVFPRd(insn, SP))));
+
+ assert(TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
+ "Tied to operand expected");
+ MI.addOperand(MI.getOperand(0));
+
+ assert(OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() &&
+ !OpInfo[2].isOptionalDef() && "Imm operand expected");
+ MI.addOperand(MCOperand::CreateImm(fbits));
+
+ NumOpsAdded = 3;
+ } else {
+ // A8.6.295
+ // The Rd (destination) and Rm (source) bits have different interpretations
+ // depending on their single-precisonness.
+ unsigned d, m;
+ if (slice(insn, 18, 18) == 1) { // to_integer operation
+ d = decodeVFPRd(insn, true /* Is Single Precision */);
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::SPRRegClassID, d)));
+ m = decodeVFPRm(insn, SP);
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, m)));
+ } else {
+ d = decodeVFPRd(insn, SP);
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, d)));
+ m = decodeVFPRm(insn, true /* Is Single Precision */);
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::SPRRegClassID, m)));
+ }
+ NumOpsAdded = 2;
+ }
+
+ return true;
+}
+
+// VMOVRS - A8.6.330
+// Rt => Rd; Sn => UInt(Vn:N)
+static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 2 && "VFPConv2Frm expects NumOps >= 2");
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+ decodeVFPRn(insn, true))));
+ NumOpsAdded = 2;
+ return true;
+}
+
+// VMOVRRD - A8.6.332
+// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm)
+//
+// VMOVRRS - A8.6.331
+// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
+static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3");
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ OpIdx = 2;
+
+ if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
+ unsigned Sm = decodeVFPRm(insn, true);
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+ Sm)));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+ Sm+1)));
+ OpIdx += 2;
+ } else {
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::DPRRegClassID,
+ decodeVFPRm(insn, false))));
+ ++OpIdx;
+ }
+ return true;
+}
+
+// VMOVSR - A8.6.330
+// Rt => Rd; Sn => UInt(Vn:N)
+static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 2 && "VFPConv4Frm expects NumOps >= 2");
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+ decodeVFPRn(insn, true))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ NumOpsAdded = 2;
+ return true;
+}
+
+// VMOVDRR - A8.6.332
+// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm)
+//
+// VMOVRRS - A8.6.331
+// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
+static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3");
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
+ unsigned Sm = decodeVFPRm(insn, true);
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+ Sm)));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+ Sm+1)));
+ OpIdx += 2;
+ } else {
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::DPRRegClassID,
+ decodeVFPRm(insn, false))));
+ ++OpIdx;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ OpIdx += 2;
+ return true;
+}
+
+// VFP Load/Store Instructions.
+// VLDRD, VLDRS, VSTRD, VSTRS
+static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3");
+
+ bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS);
+ unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
+
+ // Extract Dd/Sd for operand 0.
+ unsigned RegD = decodeVFPRd(insn, isSPVFP);
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD)));
+
+ unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+ MI.addOperand(MCOperand::CreateReg(Base));
+
+ // Next comes the AM5 Opcode.
+ ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+ unsigned char Imm8 = insn & 0xFF;
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(AddrOpcode, Imm8)));
+
+ NumOpsAdded = 3;
+
+ return true;
+}
+
+// VFP Load/Store Multiple Instructions.
+// This is similar to the algorithm for LDM/STM in that operand 0 (the base) and
+// operand 1 (the AM4 mode imm) is followed by two predicate operands. It is
+// followed by a reglist of either DPR(s) or SPR(s).
+//
+// VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
+static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 5 && "VFPLdStMulFrm expects NumOps >= 5");
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+
+ // Writeback to base, if necessary.
+ if (Opcode == ARM::VLDMDIA_UPD || Opcode == ARM::VLDMSIA_UPD ||
+ Opcode == ARM::VLDMDDB_UPD || Opcode == ARM::VLDMSDB_UPD ||
+ Opcode == ARM::VSTMDIA_UPD || Opcode == ARM::VSTMSIA_UPD ||
+ Opcode == ARM::VSTMDDB_UPD || Opcode == ARM::VSTMSDB_UPD) {
+ MI.addOperand(MCOperand::CreateReg(Base));
+ ++OpIdx;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(Base));
+
+ // Next comes the AM4 Opcode.
+ ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
+ // Must be either "ia" or "db" submode.
+ if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) {
+ DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n");
+ return false;
+ }
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
+
+ // Handling the two predicate operands before the reglist.
+ int64_t CondVal = insn >> ARMII::CondShift;
+ MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal));
+ MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+
+ OpIdx += 4;
+
+ bool isSPVFP = (Opcode == ARM::VLDMSIA || Opcode == ARM::VLDMSDB ||
+ Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD ||
+ Opcode == ARM::VSTMSIA || Opcode == ARM::VSTMSDB ||
+ Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD);
+ unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
+
+ // Extract Dd/Sd.
+ unsigned RegD = decodeVFPRd(insn, isSPVFP);
+
+ // Fill the variadic part of reglist.
+ unsigned char Imm8 = insn & 0xFF;
+ unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
+ for (unsigned i = 0; i < Regs; ++i) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
+ RegD + i)));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+// Misc. VFP Instructions.
+// FMSTAT (vmrs with Rt=0b1111, i.e., to apsr_nzcv and no register operand)
+// FCONSTD (DPR and a VFPf64Imm operand)
+// FCONSTS (SPR and a VFPf32Imm operand)
+// VMRS/VMSR (GPR operand)
+static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ if (Opcode == ARM::FMSTAT)
+ return true;
+
+ assert(NumOps >= 2 && "VFPMiscFrm expects >=2 operands");
+
+ unsigned RegEnum = 0;
+ switch (OpInfo[0].RegClass) {
+ case ARM::DPRRegClassID:
+ RegEnum = getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRd(insn, false));
+ break;
+ case ARM::SPRRegClassID:
+ RegEnum = getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRd(insn, true));
+ break;
+ case ARM::GPRRegClassID:
+ RegEnum = getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn));
+ break;
+ default:
+ assert(0 && "Invalid reg class id");
+ return false;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(RegEnum));
+ ++OpIdx;
+
+ // Extract/decode the f64/f32 immediate.
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ // The asm syntax specifies the floating point value, not the 8-bit literal.
+ APInt immRaw = VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
+ Opcode == ARM::FCONSTD ? 64 : 32);
+ APFloat immFP = APFloat(immRaw, true);
+ double imm = Opcode == ARM::FCONSTD ? immFP.convertToDouble() :
+ immFP.convertToFloat();
+ MI.addOperand(MCOperand::CreateFPImm(imm));
+
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+// DisassembleThumbFrm() is defined in ThumbDisassemblerCore.h file.
+#include "ThumbDisassemblerCore.h"
+
+/////////////////////////////////////////////////////
+// //
+// Utility Functions For ARM Advanced SIMD //
+// //
+/////////////////////////////////////////////////////
+
+// The following NEON namings are based on A8.6.266 VABA, VABAL. Notice that
+// A8.6.303 VDUP (ARM core register)'s D/Vd pair is the N/Vn pair of VABA/VABAL.
+
+// A7.3 Register encoding
+
+// Extract/Decode NEON D/Vd:
+//
+// Note that for quadword, Qd = UInt(D:Vd<3:1>) = Inst{22:15-13}, whereas for
+// doubleword, Dd = UInt(D:Vd). We compensate for this difference by
+// handling it in the getRegisterEnum() utility function.
+// D = Inst{22}, Vd = Inst{15-12}
+static unsigned decodeNEONRd(uint32_t insn) {
+ return ((insn >> ARMII::NEON_D_BitShift) & 1) << 4
+ | ((insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask);
+}
+
+// Extract/Decode NEON N/Vn:
+//
+// Note that for quadword, Qn = UInt(N:Vn<3:1>) = Inst{7:19-17}, whereas for
+// doubleword, Dn = UInt(N:Vn). We compensate for this difference by
+// handling it in the getRegisterEnum() utility function.
+// N = Inst{7}, Vn = Inst{19-16}
+static unsigned decodeNEONRn(uint32_t insn) {
+ return ((insn >> ARMII::NEON_N_BitShift) & 1) << 4
+ | ((insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask);
+}
+
+// Extract/Decode NEON M/Vm:
+//
+// Note that for quadword, Qm = UInt(M:Vm<3:1>) = Inst{5:3-1}, whereas for
+// doubleword, Dm = UInt(M:Vm). We compensate for this difference by
+// handling it in the getRegisterEnum() utility function.
+// M = Inst{5}, Vm = Inst{3-0}
+static unsigned decodeNEONRm(uint32_t insn) {
+ return ((insn >> ARMII::NEON_M_BitShift) & 1) << 4
+ | ((insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask);
+}
+
+namespace {
+enum ElemSize {
+ ESizeNA = 0,
+ ESize8 = 8,
+ ESize16 = 16,
+ ESize32 = 32,
+ ESize64 = 64
+};
+} // End of unnamed namespace
+
+// size field -> Inst{11-10}
+// index_align field -> Inst{7-4}
+//
+// The Lane Index interpretation depends on the Data Size:
+// 8 (encoded as size = 0b00) -> Index = index_align[3:1]
+// 16 (encoded as size = 0b01) -> Index = index_align[3:2]
+// 32 (encoded as size = 0b10) -> Index = index_align[3]
+//
+// Ref: A8.6.317 VLD4 (single 4-element structure to one lane).
+static unsigned decodeLaneIndex(uint32_t insn) {
+ unsigned size = insn >> 10 & 3;
+ assert((size == 0 || size == 1 || size == 2) &&
+ "Encoding error: size should be either 0, 1, or 2");
+
+ unsigned index_align = insn >> 4 & 0xF;
+ return (index_align >> 1) >> size;
+}
+
+// imm64 = AdvSIMDExpandImm(op, cmode, i:imm3:imm4)
+// op = Inst{5}, cmode = Inst{11-8}
+// i = Inst{24} (ARM architecture)
+// imm3 = Inst{18-16}, imm4 = Inst{3-0}
+// Ref: Table A7-15 Modified immediate values for Advanced SIMD instructions.
+static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) {
+ unsigned char op = (insn >> 5) & 1;
+ unsigned char cmode = (insn >> 8) & 0xF;
+ unsigned char Imm8 = ((insn >> 24) & 1) << 7 |
+ ((insn >> 16) & 7) << 4 |
+ (insn & 0xF);
+ return (op << 12) | (cmode << 8) | Imm8;
+}
+
+// A8.6.339 VMUL, VMULL (by scalar)
+// ESize16 => m = Inst{2-0} (Vm<2:0>) D0-D7
+// ESize32 => m = Inst{3-0} (Vm<3:0>) D0-D15
+static unsigned decodeRestrictedDm(uint32_t insn, ElemSize esize) {
+ switch (esize) {
+ case ESize16:
+ return insn & 7;
+ case ESize32:
+ return insn & 0xF;
+ default:
+ assert(0 && "Unreachable code!");
+ return 0;
+ }
+}
+
+// A8.6.339 VMUL, VMULL (by scalar)
+// ESize16 => index = Inst{5:3} (M:Vm<3>) D0-D7
+// ESize32 => index = Inst{5} (M) D0-D15
+static unsigned decodeRestrictedDmIndex(uint32_t insn, ElemSize esize) {
+ switch (esize) {
+ case ESize16:
+ return (((insn >> 5) & 1) << 1) | ((insn >> 3) & 1);
+ case ESize32:
+ return (insn >> 5) & 1;
+ default:
+ assert(0 && "Unreachable code!");
+ return 0;
+ }
+}
+
+// A8.6.296 VCVT (between floating-point and fixed-point, Advanced SIMD)
+// (64 - <fbits>) is encoded as imm6, i.e., Inst{21-16}.
+static unsigned decodeVCVTFractionBits(uint32_t insn) {
+ return 64 - ((insn >> 16) & 0x3F);
+}
+
+// A8.6.302 VDUP (scalar)
+// ESize8 => index = Inst{19-17}
+// ESize16 => index = Inst{19-18}
+// ESize32 => index = Inst{19}
+static unsigned decodeNVLaneDupIndex(uint32_t insn, ElemSize esize) {
+ switch (esize) {
+ case ESize8:
+ return (insn >> 17) & 7;
+ case ESize16:
+ return (insn >> 18) & 3;
+ case ESize32:
+ return (insn >> 19) & 1;
+ default:
+ assert(0 && "Unspecified element size!");
+ return 0;
+ }
+}
+
+// A8.6.328 VMOV (ARM core register to scalar)
+// A8.6.329 VMOV (scalar to ARM core register)
+// ESize8 => index = Inst{21:6-5}
+// ESize16 => index = Inst{21:6}
+// ESize32 => index = Inst{21}
+static unsigned decodeNVLaneOpIndex(uint32_t insn, ElemSize esize) {
+ switch (esize) {
+ case ESize8:
+ return ((insn >> 21) & 1) << 2 | ((insn >> 5) & 3);
+ case ESize16:
+ return ((insn >> 21) & 1) << 1 | ((insn >> 6) & 1);
+ case ESize32:
+ return ((insn >> 21) & 1);
+ default:
+ assert(0 && "Unspecified element size!");
+ return 0;
+ }
+}
+
+// Imm6 = Inst{21-16}, L = Inst{7}
+//
+// LeftShift == true (A8.6.367 VQSHL, A8.6.387 VSLI):
+// case L:imm6 of
+// '0001xxx' => esize = 8; shift_amount = imm6 - 8
+// '001xxxx' => esize = 16; shift_amount = imm6 - 16
+// '01xxxxx' => esize = 32; shift_amount = imm6 - 32
+// '1xxxxxx' => esize = 64; shift_amount = imm6
+//
+// LeftShift == false (A8.6.376 VRSHR, A8.6.368 VQSHRN):
+// case L:imm6 of
+// '0001xxx' => esize = 8; shift_amount = 16 - imm6
+// '001xxxx' => esize = 16; shift_amount = 32 - imm6
+// '01xxxxx' => esize = 32; shift_amount = 64 - imm6
+// '1xxxxxx' => esize = 64; shift_amount = 64 - imm6
+//
+static unsigned decodeNVSAmt(uint32_t insn, bool LeftShift) {
+ ElemSize esize = ESizeNA;
+ unsigned L = (insn >> 7) & 1;
+ unsigned imm6 = (insn >> 16) & 0x3F;
+ if (L == 0) {
+ if (imm6 >> 3 == 1)
+ esize = ESize8;
+ else if (imm6 >> 4 == 1)
+ esize = ESize16;
+ else if (imm6 >> 5 == 1)
+ esize = ESize32;
+ else
+ assert(0 && "Wrong encoding of Inst{7:21-16}!");
+ } else
+ esize = ESize64;
+
+ if (LeftShift)
+ return esize == ESize64 ? imm6 : (imm6 - esize);
+ else
+ return esize == ESize64 ? (esize - imm6) : (2*esize - imm6);
+}
+
+// A8.6.305 VEXT
+// Imm4 = Inst{11-8}
+static unsigned decodeN3VImm(uint32_t insn) {
+ return (insn >> 8) & 0xF;
+}
+
+// VLD*
+// D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm]
+// VLD*LN*
+// D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm] TIED_TO ... imm(idx)
+// VST*
+// Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ...
+// VST*LN*
+// Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ... [imm(idx)]
+//
+// Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it.
+static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced,
+ BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+ // At least one DPR register plus addressing mode #6.
+ assert(NumOps >= 3 && "Expect >= 3 operands");
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ // We have homogeneous NEON registers for Load/Store.
+ unsigned RegClass = 0;
+
+ // Double-spaced registers have increments of 2.
+ unsigned Inc = DblSpaced ? 2 : 1;
+
+ unsigned Rn = decodeRn(insn);
+ unsigned Rm = decodeRm(insn);
+ unsigned Rd = decodeNEONRd(insn);
+
+ // A7.7.1 Advanced SIMD addressing mode.
+ bool WB = Rm != 15;
+
+ // LLVM Addressing Mode #6.
+ unsigned RmEnum = 0;
+ if (WB && Rm != 13)
+ RmEnum = getRegisterEnum(B, ARM::GPRRegClassID, Rm);
+
+ if (Store) {
+ // Consume possible WB, AddrMode6, possible increment reg, the DPR/QPR's,
+ // then possible lane index.
+ assert(OpIdx < NumOps && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+
+ if (WB) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ Rn)));
+ ++OpIdx;
+ }
+
+ assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ Rn)));
+ MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
+ OpIdx += 2;
+
+ if (WB) {
+ MI.addOperand(MCOperand::CreateReg(RmEnum));
+ ++OpIdx;
+ }
+
+ assert(OpIdx < NumOps &&
+ (OpInfo[OpIdx].RegClass == ARM::DPRRegClassID ||
+ OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) &&
+ "Reg operand expected");
+
+ RegClass = OpInfo[OpIdx].RegClass;
+ while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass, Rd)));
+ Rd += Inc;
+ ++OpIdx;
+ }
+
+ // Handle possible lane index.
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
+ ++OpIdx;
+ }
+
+ } else {
+ // Consume the DPR/QPR's, possible WB, AddrMode6, possible incrment reg,
+ // possible TIED_TO DPR/QPR's (ignored), then possible lane index.
+ RegClass = OpInfo[0].RegClass;
+
+ while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass, Rd)));
+ Rd += Inc;
+ ++OpIdx;
+ }
+
+ if (WB) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ Rn)));
+ ++OpIdx;
+ }
+
+ assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ Rn)));
+ MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
+ OpIdx += 2;
+
+ if (WB) {
+ MI.addOperand(MCOperand::CreateReg(RmEnum));
+ ++OpIdx;
+ }
+
+ while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
+ assert(TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1 &&
+ "Tied to operand expected");
+ MI.addOperand(MCOperand::CreateReg(0));
+ ++OpIdx;
+ }
+
+ // Handle possible lane index.
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
+ ++OpIdx;
+ }
+ }
+
+ // Accessing registers past the end of the NEON register file is not
+ // defined.
+ if (Rd > 32)
+ return false;
+
+ return true;
+}
+
+// A7.7
+// If L (Inst{21}) == 0, store instructions.
+// Find out about double-spaced-ness of the Opcode and pass it on to
+// DisassembleNLdSt0().
+static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const StringRef Name = ARMInsts[Opcode].Name;
+ bool DblSpaced = false;
+
+ if (Name.find("LN") != std::string::npos) {
+ // To one lane instructions.
+ // See, for example, 8.6.317 VLD4 (single 4-element structure to one lane).
+
+ // <size> == 16 && Inst{5} == 1 --> DblSpaced = true
+ if (Name.endswith("16") || Name.endswith("16_UPD"))
+ DblSpaced = slice(insn, 5, 5) == 1;
+
+ // <size> == 32 && Inst{6} == 1 --> DblSpaced = true
+ if (Name.endswith("32") || Name.endswith("32_UPD"))
+ DblSpaced = slice(insn, 6, 6) == 1;
+
+ } else {
+ // Multiple n-element structures with type encoded as Inst{11-8}.
+ // See, for example, A8.6.316 VLD4 (multiple 4-element structures).
+
+ // n == 2 && type == 0b1001 -> DblSpaced = true
+ if (Name.startswith("VST2") || Name.startswith("VLD2"))
+ DblSpaced = slice(insn, 11, 8) == 9;
+
+ // n == 3 && type == 0b0101 -> DblSpaced = true
+ if (Name.startswith("VST3") || Name.startswith("VLD3"))
+ DblSpaced = slice(insn, 11, 8) == 5;
+
+ // n == 4 && type == 0b0001 -> DblSpaced = true
+ if (Name.startswith("VST4") || Name.startswith("VLD4"))
+ DblSpaced = slice(insn, 11, 8) == 1;
+
+ }
+ return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded,
+ slice(insn, 21, 21) == 0, DblSpaced, B);
+}
+
+// VMOV (immediate)
+// Qd/Dd imm
+static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+ assert(NumOps >= 2 &&
+ (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+ OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+ (OpInfo[1].RegClass < 0) &&
+ "Expect 1 reg operand followed by 1 imm operand");
+
+ // Qd/Dd = Inst{22:15-12} => NEON Rd
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass,
+ decodeNEONRd(insn))));
+
+ ElemSize esize = ESizeNA;
+ switch (Opcode) {
+ case ARM::VMOVv8i8:
+ case ARM::VMOVv16i8:
+ esize = ESize8;
+ break;
+ case ARM::VMOVv4i16:
+ case ARM::VMOVv8i16:
+ case ARM::VMVNv4i16:
+ case ARM::VMVNv8i16:
+ esize = ESize16;
+ break;
+ case ARM::VMOVv2i32:
+ case ARM::VMOVv4i32:
+ case ARM::VMVNv2i32:
+ case ARM::VMVNv4i32:
+ esize = ESize32;
+ break;
+ case ARM::VMOVv1i64:
+ case ARM::VMOVv2i64:
+ esize = ESize64;
+ break;
+ default:
+ assert(0 && "Unreachable code!");
+ return false;
+ }
+
+ // One register and a modified immediate value.
+ // Add the imm operand.
+ MI.addOperand(MCOperand::CreateImm(decodeN1VImm(insn, esize)));
+
+ NumOpsAdded = 2;
+ return true;
+}
+
+namespace {
+enum N2VFlag {
+ N2V_None,
+ N2V_VectorDupLane,
+ N2V_VectorConvert_Between_Float_Fixed
+};
+} // End of unnamed namespace
+
+// Vector Convert [between floating-point and fixed-point]
+// Qd/Dd Qm/Dm [fbits]
+//
+// Vector Duplicate Lane (from scalar to all elements) Instructions.
+// VDUPLN16d, VDUPLN16q, VDUPLN32d, VDUPLN32q, VDUPLN8d, VDUPLN8q:
+// Qd/Dd Dm index
+//
+// Vector Move Long:
+// Qd Dm
+//
+// Vector Move Narrow:
+// Dd Qm
+//
+// Others
+static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opc];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+ assert(NumOps >= 2 &&
+ (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+ OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+ (OpInfo[1].RegClass == ARM::DPRRegClassID ||
+ OpInfo[1].RegClass == ARM::QPRRegClassID) &&
+ "Expect >= 2 operands and first 2 as reg operands");
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ ElemSize esize = ESizeNA;
+ if (Flag == N2V_VectorDupLane) {
+ // VDUPLN has its index embedded. Its size can be inferred from the Opcode.
+ assert(Opc >= ARM::VDUPLN16d && Opc <= ARM::VDUPLN8q &&
+ "Unexpected Opcode");
+ esize = (Opc == ARM::VDUPLN8d || Opc == ARM::VDUPLN8q) ? ESize8
+ : ((Opc == ARM::VDUPLN16d || Opc == ARM::VDUPLN16q) ? ESize16
+ : ESize32);
+ }
+
+ // Qd/Dd = Inst{22:15-12} => NEON Rd
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ decodeNEONRd(insn))));
+ ++OpIdx;
+
+ // VPADAL...
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+ // TIED_TO operand.
+ MI.addOperand(MCOperand::CreateReg(0));
+ ++OpIdx;
+ }
+
+ // Dm = Inst{5:3-0} => NEON Rm
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ decodeNEONRm(insn))));
+ ++OpIdx;
+
+ // VZIP and others have two TIED_TO reg operands.
+ int Idx;
+ while (OpIdx < NumOps &&
+ (Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+ // Add TIED_TO operand.
+ MI.addOperand(MI.getOperand(Idx));
+ ++OpIdx;
+ }
+
+ // Add the imm operand, if required.
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+
+ unsigned imm = 0xFFFFFFFF;
+
+ if (Flag == N2V_VectorDupLane)
+ imm = decodeNVLaneDupIndex(insn, esize);
+ if (Flag == N2V_VectorConvert_Between_Float_Fixed)
+ imm = decodeVCVTFractionBits(insn);
+
+ assert(imm != 0xFFFFFFFF && "Internal error");
+ MI.addOperand(MCOperand::CreateImm(imm));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+static bool DisassembleN2RegFrm(MCInst &MI, unsigned Opc, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+ N2V_None, B);
+}
+static bool DisassembleNVCVTFrm(MCInst &MI, unsigned Opc, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+ N2V_VectorConvert_Between_Float_Fixed, B);
+}
+static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+ N2V_VectorDupLane, B);
+}
+
+// Vector Shift [Accumulate] Instructions.
+// Qd/Dd [Qd/Dd (TIED_TO)] Qm/Dm ShiftAmt
+//
+// Vector Shift Left Long (with maximum shift count) Instructions.
+// VSHLLi16, VSHLLi32, VSHLLi8: Qd Dm imm (== size)
+//
+static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+ assert(NumOps >= 3 &&
+ (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+ OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+ (OpInfo[1].RegClass == ARM::DPRRegClassID ||
+ OpInfo[1].RegClass == ARM::QPRRegClassID) &&
+ "Expect >= 3 operands and first 2 as reg operands");
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ // Qd/Dd = Inst{22:15-12} => NEON Rd
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ decodeNEONRd(insn))));
+ ++OpIdx;
+
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+ // TIED_TO operand.
+ MI.addOperand(MCOperand::CreateReg(0));
+ ++OpIdx;
+ }
+
+ assert((OpInfo[OpIdx].RegClass == ARM::DPRRegClassID ||
+ OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) &&
+ "Reg operand expected");
+
+ // Qm/Dm = Inst{5:3-0} => NEON Rm
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ decodeNEONRm(insn))));
+ ++OpIdx;
+
+ assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected");
+
+ // Add the imm operand.
+
+ // VSHLL has maximum shift count as the imm, inferred from its size.
+ unsigned Imm;
+ switch (Opcode) {
+ default:
+ Imm = decodeNVSAmt(insn, LeftShift);
+ break;
+ case ARM::VSHLLi8:
+ Imm = 8;
+ break;
+ case ARM::VSHLLi16:
+ Imm = 16;
+ break;
+ case ARM::VSHLLi32:
+ Imm = 32;
+ break;
+ }
+ MI.addOperand(MCOperand::CreateImm(Imm));
+ ++OpIdx;
+
+ return true;
+}
+
+// Left shift instructions.
+static bool DisassembleN2RegVecShLFrm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true,
+ B);
+}
+// Right shift instructions have different shift amount interpretation.
+static bool DisassembleN2RegVecShRFrm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false,
+ B);
+}
+
+namespace {
+enum N3VFlag {
+ N3V_None,
+ N3V_VectorExtract,
+ N3V_VectorShift,
+ N3V_Multiply_By_Scalar
+};
+} // End of unnamed namespace
+
+// NEON Three Register Instructions with Optional Immediate Operand
+//
+// Vector Extract Instructions.
+// Qd/Dd Qn/Dn Qm/Dm imm4
+//
+// Vector Shift (Register) Instructions.
+// Qd/Dd Qm/Dm Qn/Dn (notice the order of m, n)
+//
+// Vector Multiply [Accumulate/Subtract] [Long] By Scalar Instructions.
+// Qd/Dd Qn/Dn RestrictedDm index
+//
+// Others
+static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+ // No checking for OpInfo[2] because of MOVDneon/MOVQ with only two regs.
+ assert(NumOps >= 3 &&
+ (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+ OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+ (OpInfo[1].RegClass == ARM::DPRRegClassID ||
+ OpInfo[1].RegClass == ARM::QPRRegClassID) &&
+ "Expect >= 3 operands and first 2 as reg operands");
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ bool VdVnVm = Flag == N3V_VectorShift ? false : true;
+ bool IsImm4 = Flag == N3V_VectorExtract ? true : false;
+ bool IsDmRestricted = Flag == N3V_Multiply_By_Scalar ? true : false;
+ ElemSize esize = ESizeNA;
+ if (Flag == N3V_Multiply_By_Scalar) {
+ unsigned size = (insn >> 20) & 3;
+ if (size == 1) esize = ESize16;
+ if (size == 2) esize = ESize32;
+ assert (esize == ESize16 || esize == ESize32);
+ }
+
+ // Qd/Dd = Inst{22:15-12} => NEON Rd
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ decodeNEONRd(insn))));
+ ++OpIdx;
+
+ // VABA, VABAL, VBSLd, VBSLq, ...
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+ // TIED_TO operand.
+ MI.addOperand(MCOperand::CreateReg(0));
+ ++OpIdx;
+ }
+
+ // Dn = Inst{7:19-16} => NEON Rn
+ // or
+ // Dm = Inst{5:3-0} => NEON Rm
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ VdVnVm ? decodeNEONRn(insn)
+ : decodeNEONRm(insn))));
+ ++OpIdx;
+
+ // Special case handling for VMOVDneon and VMOVQ because they are marked as
+ // N3RegFrm.
+ if (Opcode == ARM::VMOVDneon || Opcode == ARM::VMOVQ)
+ return true;
+
+ // Dm = Inst{5:3-0} => NEON Rm
+ // or
+ // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise
+ // or
+ // Dn = Inst{7:19-16} => NEON Rn
+ unsigned m = VdVnVm ? (IsDmRestricted ? decodeRestrictedDm(insn, esize)
+ : decodeNEONRm(insn))
+ : decodeNEONRn(insn);
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, OpInfo[OpIdx].RegClass, m)));
+ ++OpIdx;
+
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ // Add the imm operand.
+ unsigned Imm = 0;
+ if (IsImm4)
+ Imm = decodeN3VImm(insn);
+ else if (IsDmRestricted)
+ Imm = decodeRestrictedDmIndex(insn, esize);
+ else {
+ assert(0 && "Internal error: unreachable code!");
+ return false;
+ }
+
+ MI.addOperand(MCOperand::CreateImm(Imm));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+static bool DisassembleN3RegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ N3V_None, B);
+}
+static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ N3V_VectorShift, B);
+}
+static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ N3V_VectorExtract, B);
+}
+static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ N3V_Multiply_By_Scalar, B);
+}
+
+// Vector Table Lookup
+//
+// VTBL1, VTBX1: Dd [Dd(TIED_TO)] Dn Dm
+// VTBL2, VTBX2: Dd [Dd(TIED_TO)] Dn Dn+1 Dm
+// VTBL3, VTBX3: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dm
+// VTBL4, VTBX4: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dn+3 Dm
+static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps >= 3 &&
+ OpInfo[0].RegClass == ARM::DPRRegClassID &&
+ OpInfo[1].RegClass == ARM::DPRRegClassID &&
+ OpInfo[2].RegClass == ARM::DPRRegClassID &&
+ "Expect >= 3 operands and first 3 as reg operands");
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ unsigned Rn = decodeNEONRn(insn);
+
+ // {Dn} encoded as len = 0b00
+ // {Dn Dn+1} encoded as len = 0b01
+ // {Dn Dn+1 Dn+2 } encoded as len = 0b10
+ // {Dn Dn+1 Dn+2 Dn+3} encoded as len = 0b11
+ unsigned Len = slice(insn, 9, 8) + 1;
+
+ // Dd (the destination vector)
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+ decodeNEONRd(insn))));
+ ++OpIdx;
+
+ // Process tied_to operand constraint.
+ int Idx;
+ if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+ MI.addOperand(MI.getOperand(Idx));
+ ++OpIdx;
+ }
+
+ // Do the <list> now.
+ for (unsigned i = 0; i < Len; ++i) {
+ assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+ Rn + i)));
+ ++OpIdx;
+ }
+
+ // Dm (the index vector)
+ assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
+ "Reg operand (index vector) expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+ decodeNEONRm(insn))));
+ ++OpIdx;
+
+ return true;
+}
+
+// Vector Get Lane (move scalar to ARM core register) Instructions.
+// VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index
+static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ if (!OpInfo) return false;
+
+ assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
+ OpInfo[0].RegClass == ARM::GPRRegClassID &&
+ OpInfo[1].RegClass == ARM::DPRRegClassID &&
+ OpInfo[2].RegClass < 0 &&
+ "Expect >= 3 operands with one dst operand");
+
+ ElemSize esize =
+ Opcode == ARM::VGETLNi32 ? ESize32
+ : ((Opcode == ARM::VGETLNs16 || Opcode == ARM::VGETLNu16) ? ESize16
+ : ESize32);
+
+ // Rt = Inst{15-12} => ARM Rd
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ // Dn = Inst{7:19-16} => NEON Rn
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+ decodeNEONRn(insn))));
+
+ MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
+
+ NumOpsAdded = 3;
+ return true;
+}
+
+// Vector Set Lane (move ARM core register to scalar) Instructions.
+// VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index
+static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ if (!OpInfo) return false;
+
+ assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
+ OpInfo[0].RegClass == ARM::DPRRegClassID &&
+ OpInfo[1].RegClass == ARM::DPRRegClassID &&
+ TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
+ OpInfo[2].RegClass == ARM::GPRRegClassID &&
+ OpInfo[3].RegClass < 0 &&
+ "Expect >= 3 operands with one dst operand");
+
+ ElemSize esize =
+ Opcode == ARM::VSETLNi8 ? ESize8
+ : (Opcode == ARM::VSETLNi16 ? ESize16
+ : ESize32);
+
+ // Dd = Inst{7:19-16} => NEON Rn
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+ decodeNEONRn(insn))));
+
+ // TIED_TO operand.
+ MI.addOperand(MCOperand::CreateReg(0));
+
+ // Rt = Inst{15-12} => ARM Rd
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
+
+ NumOpsAdded = 4;
+ return true;
+}
+
+// Vector Duplicate Instructions (from ARM core register to all elements).
+// VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt
+static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+ assert(NumOps >= 2 &&
+ (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+ OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+ OpInfo[1].RegClass == ARM::GPRRegClassID &&
+ "Expect >= 2 operands and first 2 as reg operand");
+
+ unsigned RegClass = OpInfo[0].RegClass;
+
+ // Qd/Dd = Inst{7:19-16} => NEON Rn
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClass,
+ decodeNEONRn(insn))));
+
+ // Rt = Inst{15-12} => ARM Rd
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ NumOpsAdded = 2;
+ return true;
+}
+
+// A8.6.41 DMB
+// A8.6.42 DSB
+// A8.6.49 ISB
+static inline bool MemBarrierInstr(uint32_t insn) {
+ unsigned op7_4 = slice(insn, 7, 4);
+ if (slice(insn, 31, 8) == 0xf57ff0 && (op7_4 >= 4 && op7_4 <= 6))
+ return true;
+
+ return false;
+}
+
+static inline bool PreLoadOpcode(unsigned Opcode) {
+ switch(Opcode) {
+ case ARM::PLDi12: case ARM::PLDrs:
+ case ARM::PLDWi12: case ARM::PLDWrs:
+ case ARM::PLIi12: case ARM::PLIrs:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ // Preload Data/Instruction requires either 2 or 3 operands.
+ // PLDi, PLDWi, PLIi: addrmode_imm12
+ // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: ldst_so_reg
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+
+ if (Opcode == ARM::PLDi12 || Opcode == ARM::PLDWi12
+ || Opcode == ARM::PLIi12) {
+ unsigned Imm12 = slice(insn, 11, 0);
+ bool Negative = getUBit(insn) == 0;
+ // -0 is represented specially. All other values are as normal.
+ if (Imm12 == 0 && Negative)
+ Imm12 = INT32_MIN;
+ MI.addOperand(MCOperand::CreateImm(Imm12));
+ NumOpsAdded = 2;
+ } else {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+
+ ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+
+ // Inst{6-5} encodes the shift opcode.
+ ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+ // Inst{11-7} encodes the imm5 shift amount.
+ unsigned ShImm = slice(insn, 11, 7);
+
+ // A8.4.1. Possible rrx or shift amount of 32...
+ getImmShiftSE(ShOp, ShImm);
+ MI.addOperand(MCOperand::CreateImm(
+ ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp)));
+ NumOpsAdded = 3;
+ }
+
+ return true;
+}
+
+static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ if (MemBarrierInstr(insn)) {
+ // DMBsy, DSBsy, and ISBsy instructions have zero operand and are taken care
+ // of within the generic ARMBasicMCBuilder::BuildIt() method.
+ //
+ // Inst{3-0} encodes the memory barrier option for the variants.
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ switch (Opcode) {
+ case ARM::CLREX:
+ case ARM::NOP:
+ case ARM::TRAP:
+ case ARM::YIELD:
+ case ARM::WFE:
+ case ARM::WFI:
+ case ARM::SEV:
+ return true;
+ default:
+ break;
+ }
+
+ if (Opcode == ARM::SETEND) {
+ NumOpsAdded = 1;
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 9, 9)));
+ return true;
+ }
+
+ // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
+ // opcodes which match the same real instruction. This is needed since there's
+ // no current handling of optional arguments. Fix here when a better handling
+ // of optional arguments is implemented.
+ if (Opcode == ARM::CPS3p) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
+ NumOpsAdded = 3;
+ return true;
+ }
+ if (Opcode == ARM::CPS2p) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags
+ NumOpsAdded = 2;
+ return true;
+ }
+ if (Opcode == ARM::CPS1p) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ // DBG has its option specified in Inst{3-0}.
+ if (Opcode == ARM::DBG) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ // BKPT takes an imm32 val equal to ZeroExtend(Inst{19-8:3-0}).
+ if (Opcode == ARM::BKPT) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 8) << 4 |
+ slice(insn, 3, 0)));
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ if (PreLoadOpcode(Opcode))
+ return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+ assert(0 && "Unexpected misc instruction!");
+ return false;
+}
+
+/// FuncPtrs - FuncPtrs maps ARMFormat to its corresponding DisassembleFP.
+/// We divide the disassembly task into different categories, with each one
+/// corresponding to a specific instruction encoding format. There could be
+/// exceptions when handling a specific format, and that is why the Opcode is
+/// also present in the function prototype.
+static const DisassembleFP FuncPtrs[] = {
+ &DisassemblePseudo,
+ &DisassembleMulFrm,
+ &DisassembleBrFrm,
+ &DisassembleBrMiscFrm,
+ &DisassembleDPFrm,
+ &DisassembleDPSoRegFrm,
+ &DisassembleLdFrm,
+ &DisassembleStFrm,
+ &DisassembleLdMiscFrm,
+ &DisassembleStMiscFrm,
+ &DisassembleLdStMulFrm,
+ &DisassembleLdStExFrm,
+ &DisassembleArithMiscFrm,
+ &DisassembleSatFrm,
+ &DisassembleExtFrm,
+ &DisassembleVFPUnaryFrm,
+ &DisassembleVFPBinaryFrm,
+ &DisassembleVFPConv1Frm,
+ &DisassembleVFPConv2Frm,
+ &DisassembleVFPConv3Frm,
+ &DisassembleVFPConv4Frm,
+ &DisassembleVFPConv5Frm,
+ &DisassembleVFPLdStFrm,
+ &DisassembleVFPLdStMulFrm,
+ &DisassembleVFPMiscFrm,
+ &DisassembleThumbFrm,
+ &DisassembleMiscFrm,
+ &DisassembleNGetLnFrm,
+ &DisassembleNSetLnFrm,
+ &DisassembleNDupFrm,
+
+ // VLD and VST (including one lane) Instructions.
+ &DisassembleNLdSt,
+
+ // A7.4.6 One register and a modified immediate value
+ // 1-Register Instructions with imm.
+ // LLVM only defines VMOVv instructions.
+ &DisassembleN1RegModImmFrm,
+
+ // 2-Register Instructions with no imm.
+ &DisassembleN2RegFrm,
+
+ // 2-Register Instructions with imm (vector convert float/fixed point).
+ &DisassembleNVCVTFrm,
+
+ // 2-Register Instructions with imm (vector dup lane).
+ &DisassembleNVecDupLnFrm,
+
+ // Vector Shift Left Instructions.
+ &DisassembleN2RegVecShLFrm,
+
+ // Vector Shift Righ Instructions, which has different interpretation of the
+ // shift amount from the imm6 field.
+ &DisassembleN2RegVecShRFrm,
+
+ // 3-Register Data-Processing Instructions.
+ &DisassembleN3RegFrm,
+
+ // Vector Shift (Register) Instructions.
+ // D:Vd M:Vm N:Vn (notice that M:Vm is the first operand)
+ &DisassembleN3RegVecShFrm,
+
+ // Vector Extract Instructions.
+ &DisassembleNVecExtractFrm,
+
+ // Vector [Saturating Rounding Doubling] Multiply [Accumulate/Subtract] [Long]
+ // By Scalar Instructions.
+ &DisassembleNVecMulScalarFrm,
+
+ // Vector Table Lookup uses byte indexes in a control vector to look up byte
+ // values in a table and generate a new vector.
+ &DisassembleNVTBLFrm,
+
+ NULL
+};
+
+/// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
+/// The general idea is to set the Opcode for the MCInst, followed by adding
+/// the appropriate MCOperands to the MCInst. ARM Basic MC Builder delegates
+/// to the Format-specific disassemble function for disassembly, followed by
+/// TryPredicateAndSBitModifier() to do PredicateOperand and OptionalDefOperand
+/// which follow the Dst/Src Operands.
+bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) {
+ // Stage 1 sets the Opcode.
+ MI.setOpcode(Opcode);
+ // If the number of operands is zero, we're done!
+ if (NumOps == 0)
+ return true;
+
+ // Stage 2 calls the format-specific disassemble function to build the operand
+ // list.
+ if (Disasm == NULL)
+ return false;
+ unsigned NumOpsAdded = 0;
+ bool OK = (*Disasm)(MI, Opcode, insn, NumOps, NumOpsAdded, this);
+
+ if (!OK || this->Err != 0) return false;
+ if (NumOpsAdded >= NumOps)
+ return true;
+
+ // Stage 3 deals with operands unaccounted for after stage 2 is finished.
+ // FIXME: Should this be done selectively?
+ return TryPredicateAndSBitModifier(MI, Opcode, insn, NumOps - NumOpsAdded);
+}
+
+// A8.3 Conditional execution
+// A8.3.1 Pseudocode details of conditional execution
+// Condition bits '111x' indicate the instruction is always executed.
+static uint32_t CondCode(uint32_t CondField) {
+ if (CondField == 0xF)
+ return ARMCC::AL;
+ return CondField;
+}
+
+/// DoPredicateOperands - DoPredicateOperands process the predicate operands
+/// of some Thumb instructions which come before the reglist operands. It
+/// returns true if the two predicate operands have been processed.
+bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode,
+ uint32_t /* insn */, unsigned short NumOpsRemaining) {
+
+ assert(NumOpsRemaining > 0 && "Invalid argument");
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned Idx = MI.getNumOperands();
+
+ // First, we check whether this instr specifies the PredicateOperand through
+ // a pair of TargetOperandInfos with isPredicate() property.
+ if (NumOpsRemaining >= 2 &&
+ OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
+ OpInfo[Idx].RegClass < 0 &&
+ OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+ {
+ // If we are inside an IT block, get the IT condition bits maintained via
+ // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
+ // See also A2.5.2.
+ if (InITBlock())
+ MI.addOperand(MCOperand::CreateImm(GetITCond()));
+ else
+ MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ return true;
+ }
+
+ return false;
+}
+
+/// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
+/// the possible Predicate and SBitModifier, to build the remaining MCOperand
+/// constituents.
+bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOpsRemaining) {
+
+ assert(NumOpsRemaining > 0 && "Invalid argument");
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ const std::string &Name = ARMInsts[Opcode].Name;
+ unsigned Idx = MI.getNumOperands();
+
+ // First, we check whether this instr specifies the PredicateOperand through
+ // a pair of TargetOperandInfos with isPredicate() property.
+ if (NumOpsRemaining >= 2 &&
+ OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
+ OpInfo[Idx].RegClass < 0 &&
+ OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+ {
+ // If we are inside an IT block, get the IT condition bits maintained via
+ // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
+ // See also A2.5.2.
+ if (InITBlock())
+ MI.addOperand(MCOperand::CreateImm(GetITCond()));
+ else {
+ if (Name.length() > 1 && Name[0] == 't') {
+ // Thumb conditional branch instructions have their cond field embedded,
+ // like ARM.
+ //
+ // A8.6.16 B
+ if (Name == "t2Bcc")
+ MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 25, 22))));
+ else if (Name == "tBcc")
+ MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 11, 8))));
+ else
+ MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ } else {
+ // ARM instructions get their condition field from Inst{31-28}.
+ MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn))));
+ }
+ }
+ MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ Idx += 2;
+ NumOpsRemaining -= 2;
+ }
+
+ if (NumOpsRemaining == 0)
+ return true;
+
+ // Next, if OptionalDefOperand exists, we check whether the 'S' bit is set.
+ if (OpInfo[Idx].isOptionalDef() && OpInfo[Idx].RegClass==ARM::CCRRegClassID) {
+ MI.addOperand(MCOperand::CreateReg(getSBit(insn) == 1 ? ARM::CPSR : 0));
+ --NumOpsRemaining;
+ }
+
+ if (NumOpsRemaining == 0)
+ return true;
+ else
+ return false;
+}
+
+/// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
+/// after BuildIt is finished.
+bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI,
+ uint32_t insn) {
+
+ if (!SP) return Status;
+
+ if (Opcode == ARM::t2IT)
+ Status = SP->InitIT(slice(insn, 7, 0)) ? Status : false;
+ else if (InITBlock())
+ SP->UpdateIT();
+
+ return Status;
+}
+
+/// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
+ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format,
+ unsigned short num)
+ : Opcode(opc), Format(format), NumOps(num), SP(0), Err(0) {
+ unsigned Idx = (unsigned)format;
+ assert(Idx < (array_lengthof(FuncPtrs) - 1) && "Unknown format");
+ Disasm = FuncPtrs[Idx];
+}
+
+/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
+/// infrastructure of an MCInst given the Opcode and Format of the instr.
+/// Return NULL if it fails to create/return a proper builder. API clients
+/// are responsible for freeing up of the allocated memory. Cacheing can be
+/// performed by the API clients to improve performance.
+ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) {
+ // For "Unknown format", fail by returning a NULL pointer.
+ if ((unsigned)Format >= (array_lengthof(FuncPtrs) - 1)) {
+ DEBUG(errs() << "Unknown format\n");
+ return 0;
+ }
+
+ return new ARMBasicMCBuilder(Opcode, Format,
+ ARMInsts[Opcode].getNumOperands());
+}
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
new file mode 100644
index 0000000..9c30d33
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -0,0 +1,262 @@
+//===- ARMDisassemblerCore.h - ARM disassembler helpers ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+//
+// The first part defines the enumeration type of ARM instruction format, which
+// specifies the encoding used by the instruction, as well as a helper function
+// to convert the enums to printable char strings.
+//
+// It also contains code to represent the concepts of Builder and DisassembleFP
+// to solve the problem of disassembling an ARM instr.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMDISASSEMBLERCORE_H
+#define ARMDISASSEMBLERCORE_H
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMDisassembler.h"
+
+namespace llvm {
+
+class ARMUtils {
+public:
+ static const char *OpcodeName(unsigned Opcode);
+};
+
+/////////////////////////////////////////////////////
+// //
+// Enums and Utilities for ARM Instruction Format //
+// //
+/////////////////////////////////////////////////////
+
+#define ARM_FORMATS \
+ ENTRY(ARM_FORMAT_PSEUDO, 0) \
+ ENTRY(ARM_FORMAT_MULFRM, 1) \
+ ENTRY(ARM_FORMAT_BRFRM, 2) \
+ ENTRY(ARM_FORMAT_BRMISCFRM, 3) \
+ ENTRY(ARM_FORMAT_DPFRM, 4) \
+ ENTRY(ARM_FORMAT_DPSOREGFRM, 5) \
+ ENTRY(ARM_FORMAT_LDFRM, 6) \
+ ENTRY(ARM_FORMAT_STFRM, 7) \
+ ENTRY(ARM_FORMAT_LDMISCFRM, 8) \
+ ENTRY(ARM_FORMAT_STMISCFRM, 9) \
+ ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \
+ ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \
+ ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \
+ ENTRY(ARM_FORMAT_SATFRM, 13) \
+ ENTRY(ARM_FORMAT_EXTFRM, 14) \
+ ENTRY(ARM_FORMAT_VFPUNARYFRM, 15) \
+ ENTRY(ARM_FORMAT_VFPBINARYFRM, 16) \
+ ENTRY(ARM_FORMAT_VFPCONV1FRM, 17) \
+ ENTRY(ARM_FORMAT_VFPCONV2FRM, 18) \
+ ENTRY(ARM_FORMAT_VFPCONV3FRM, 19) \
+ ENTRY(ARM_FORMAT_VFPCONV4FRM, 20) \
+ ENTRY(ARM_FORMAT_VFPCONV5FRM, 21) \
+ ENTRY(ARM_FORMAT_VFPLDSTFRM, 22) \
+ ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
+ ENTRY(ARM_FORMAT_VFPMISCFRM, 24) \
+ ENTRY(ARM_FORMAT_THUMBFRM, 25) \
+ ENTRY(ARM_FORMAT_MISCFRM, 26) \
+ ENTRY(ARM_FORMAT_NEONGETLNFRM, 27) \
+ ENTRY(ARM_FORMAT_NEONSETLNFRM, 28) \
+ ENTRY(ARM_FORMAT_NEONDUPFRM, 29) \
+ ENTRY(ARM_FORMAT_NLdSt, 30) \
+ ENTRY(ARM_FORMAT_N1RegModImm, 31) \
+ ENTRY(ARM_FORMAT_N2Reg, 32) \
+ ENTRY(ARM_FORMAT_NVCVT, 33) \
+ ENTRY(ARM_FORMAT_NVecDupLn, 34) \
+ ENTRY(ARM_FORMAT_N2RegVecShL, 35) \
+ ENTRY(ARM_FORMAT_N2RegVecShR, 36) \
+ ENTRY(ARM_FORMAT_N3Reg, 37) \
+ ENTRY(ARM_FORMAT_N3RegVecSh, 38) \
+ ENTRY(ARM_FORMAT_NVecExtract, 39) \
+ ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
+ ENTRY(ARM_FORMAT_NVTBL, 41)
+
+// ARM instruction format specifies the encoding used by the instruction.
+#define ENTRY(n, v) n = v,
+typedef enum {
+ ARM_FORMATS
+ ARM_FORMAT_NA
+} ARMFormat;
+#undef ENTRY
+
+// Converts enum to const char*.
+static const inline char *stringForARMFormat(ARMFormat form) {
+#define ENTRY(n, v) case n: return #n;
+ switch(form) {
+ ARM_FORMATS
+ case ARM_FORMAT_NA:
+ default:
+ return "";
+ }
+#undef ENTRY
+}
+
+/// Expands on the enum definitions from ARMBaseInstrInfo.h.
+/// They are being used by the disassembler implementation.
+namespace ARMII {
+ enum {
+ NEONRegMask = 15,
+ GPRRegMask = 15,
+ NEON_RegRdShift = 12,
+ NEON_D_BitShift = 22,
+ NEON_RegRnShift = 16,
+ NEON_N_BitShift = 7,
+ NEON_RegRmShift = 0,
+ NEON_M_BitShift = 5
+ };
+}
+
+/// Utility function for extracting [From, To] bits from a uint32_t.
+static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) {
+ assert(From < 32 && To < 32 && From >= To);
+ return (Bits >> To) & ((1 << (From - To + 1)) - 1);
+}
+
+/// Utility function for setting [From, To] bits to Val for a uint32_t.
+static inline void setSlice(unsigned &Bits, unsigned From, unsigned To,
+ unsigned Val) {
+ assert(From < 32 && To < 32 && From >= To);
+ uint32_t Mask = ((1 << (From - To + 1)) - 1);
+ Bits &= ~(Mask << To);
+ Bits |= (Val & Mask) << To;
+}
+
+/// Various utilities for checking the target specific flags.
+
+/// A unary data processing instruction doesn't have an Rn operand.
+static inline bool isUnaryDP(uint64_t TSFlags) {
+ return (TSFlags & ARMII::UnaryDP);
+}
+
+/// This four-bit field describes the addressing mode used.
+/// See also ARMBaseInstrInfo.h.
+static inline unsigned getAddrMode(uint64_t TSFlags) {
+ return (TSFlags & ARMII::AddrModeMask);
+}
+
+/// {IndexModePre, IndexModePost}
+/// Only valid for load and store ops.
+/// See also ARMBaseInstrInfo.h.
+static inline unsigned getIndexMode(uint64_t TSFlags) {
+ return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+}
+
+/// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList.
+static inline bool isPrePostLdSt(uint64_t TSFlags) {
+ return (TSFlags & ARMII::IndexModeMask) != 0;
+}
+
+// Forward declaration.
+class ARMBasicMCBuilder;
+
+// Builder Object is mostly ignored except in some Thumb disassemble functions.
+typedef ARMBasicMCBuilder *BO;
+
+/// DisassembleFP - DisassembleFP points to a function that disassembles an insn
+/// and builds the MCOperand list upon disassembly. It returns false on failure
+/// or true on success. The number of operands added is updated upon success.
+typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO Builder);
+
+/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
+/// infrastructure of an MCInst given the Opcode and Format of the instr.
+/// Return NULL if it fails to create/return a proper builder. API clients
+/// are responsible for freeing up of the allocated memory. Cacheing can be
+/// performed by the API clients to improve performance.
+extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
+
+/// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that
+/// knows how to build up the MCOperand list.
+class ARMBasicMCBuilder {
+ friend ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
+ unsigned Opcode;
+ ARMFormat Format;
+ unsigned short NumOps;
+ DisassembleFP Disasm;
+ Session *SP;
+ int Err; // !=0 if the builder encounters some error condition during build.
+
+private:
+ /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
+ ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num);
+
+public:
+ ARMBasicMCBuilder(ARMBasicMCBuilder &B)
+ : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm),
+ SP(B.SP) {
+ Err = 0;
+ }
+
+ virtual ~ARMBasicMCBuilder() {}
+
+ void SetSession(Session *sp) {
+ SP = sp;
+ }
+
+ void SetErr(int ErrCode) {
+ Err = ErrCode;
+ }
+
+ /// DoPredicateOperands - DoPredicateOperands process the predicate operands
+ /// of some Thumb instructions which come before the reglist operands. It
+ /// returns true if the two predicate operands have been processed.
+ bool DoPredicateOperands(MCInst& MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOpsRemaning);
+
+ /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
+ /// the possible Predicate and SBitModifier, to build the remaining MCOperand
+ /// constituents.
+ bool TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOpsRemaning);
+
+ /// InITBlock - InITBlock returns true if we are inside an IT block.
+ bool InITBlock() {
+ if (SP)
+ return SP->ITCounter > 0;
+
+ return false;
+ }
+
+ /// Build - Build delegates to BuildIt to perform the heavy liftling. After
+ /// that, it invokes RunBuildAfterHook where some housekeepings can be done.
+ virtual bool Build(MCInst &MI, uint32_t insn) {
+ bool Status = BuildIt(MI, insn);
+ return RunBuildAfterHook(Status, MI, insn);
+ }
+
+ /// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
+ /// The general idea is to set the Opcode for the MCInst, followed by adding
+ /// the appropriate MCOperands to the MCInst. ARM Basic MC Builder delegates
+ /// to the Format-specific disassemble function for disassembly, followed by
+ /// TryPredicateAndSBitModifier() for PredicateOperand and OptionalDefOperand
+ /// which follow the Dst/Src Operands.
+ virtual bool BuildIt(MCInst &MI, uint32_t insn);
+
+ /// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
+ /// after BuildIt is finished.
+ virtual bool RunBuildAfterHook(bool Status, MCInst &MI, uint32_t insn);
+
+private:
+ /// Get condition of the current IT instruction.
+ unsigned GetITCond() {
+ assert(SP);
+ return slice(SP->ITState, 7, 4);
+ }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
new file mode 100644
index 0000000..23372e0
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -0,0 +1,2239 @@
+//===- ThumbDisassemblerCore.h - Thumb disassembler helpers -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains code for disassembling a Thumb instr. It is to be included by
+// ARMDisassemblerCore.cpp because it contains the static DisassembleThumbFrm()
+// function which acts as the dispatcher to disassemble a Thumb instruction.
+//
+//===----------------------------------------------------------------------===//
+
+///////////////////////////////
+// //
+// Utility Functions //
+// //
+///////////////////////////////
+
+// Utilities for 16-bit Thumb instructions.
+/*
+15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ [ tRt ]
+ [ tRm ] [ tRn ] [ tRd ]
+ D [ Rm ] [ Rd ]
+
+ [ imm3]
+ [ imm5 ]
+ i [ imm5 ]
+ [ imm7 ]
+ [ imm8 ]
+ [ imm11 ]
+
+ [ cond ]
+*/
+
+// Extract tRt: Inst{10-8}.
+static inline unsigned getT1tRt(uint32_t insn) {
+ return slice(insn, 10, 8);
+}
+
+// Extract tRm: Inst{8-6}.
+static inline unsigned getT1tRm(uint32_t insn) {
+ return slice(insn, 8, 6);
+}
+
+// Extract tRn: Inst{5-3}.
+static inline unsigned getT1tRn(uint32_t insn) {
+ return slice(insn, 5, 3);
+}
+
+// Extract tRd: Inst{2-0}.
+static inline unsigned getT1tRd(uint32_t insn) {
+ return slice(insn, 2, 0);
+}
+
+// Extract [D:Rd]: Inst{7:2-0}.
+static inline unsigned getT1Rd(uint32_t insn) {
+ return slice(insn, 7, 7) << 3 | slice(insn, 2, 0);
+}
+
+// Extract Rm: Inst{6-3}.
+static inline unsigned getT1Rm(uint32_t insn) {
+ return slice(insn, 6, 3);
+}
+
+// Extract imm3: Inst{8-6}.
+static inline unsigned getT1Imm3(uint32_t insn) {
+ return slice(insn, 8, 6);
+}
+
+// Extract imm5: Inst{10-6}.
+static inline unsigned getT1Imm5(uint32_t insn) {
+ return slice(insn, 10, 6);
+}
+
+// Extract i:imm5: Inst{9:7-3}.
+static inline unsigned getT1Imm6(uint32_t insn) {
+ return slice(insn, 9, 9) << 5 | slice(insn, 7, 3);
+}
+
+// Extract imm7: Inst{6-0}.
+static inline unsigned getT1Imm7(uint32_t insn) {
+ return slice(insn, 6, 0);
+}
+
+// Extract imm8: Inst{7-0}.
+static inline unsigned getT1Imm8(uint32_t insn) {
+ return slice(insn, 7, 0);
+}
+
+// Extract imm11: Inst{10-0}.
+static inline unsigned getT1Imm11(uint32_t insn) {
+ return slice(insn, 10, 0);
+}
+
+// Extract cond: Inst{11-8}.
+static inline unsigned getT1Cond(uint32_t insn) {
+ return slice(insn, 11, 8);
+}
+
+static inline bool IsGPR(unsigned RegClass) {
+ return RegClass == ARM::GPRRegClassID || RegClass == ARM::rGPRRegClassID;
+}
+
+// Utilities for 32-bit Thumb instructions.
+
+// Extract imm4: Inst{19-16}.
+static inline unsigned getImm4(uint32_t insn) {
+ return slice(insn, 19, 16);
+}
+
+// Extract imm3: Inst{14-12}.
+static inline unsigned getImm3(uint32_t insn) {
+ return slice(insn, 14, 12);
+}
+
+// Extract imm8: Inst{7-0}.
+static inline unsigned getImm8(uint32_t insn) {
+ return slice(insn, 7, 0);
+}
+
+// A8.6.61 LDRB (immediate, Thumb) and friends
+// +/-: Inst{9}
+// imm8: Inst{7-0}
+static inline int decodeImm8(uint32_t insn) {
+ int Offset = getImm8(insn);
+ return slice(insn, 9, 9) ? Offset : -Offset;
+}
+
+// Extract imm12: Inst{11-0}.
+static inline unsigned getImm12(uint32_t insn) {
+ return slice(insn, 11, 0);
+}
+
+// A8.6.63 LDRB (literal) and friends
+// +/-: Inst{23}
+// imm12: Inst{11-0}
+static inline int decodeImm12(uint32_t insn) {
+ int Offset = getImm12(insn);
+ return slice(insn, 23, 23) ? Offset : -Offset;
+}
+
+// Extract imm2: Inst{7-6}.
+static inline unsigned getImm2(uint32_t insn) {
+ return slice(insn, 7, 6);
+}
+
+// For BFI, BFC, t2SBFX, and t2UBFX.
+// Extract lsb: Inst{14-12:7-6}.
+static inline unsigned getLsb(uint32_t insn) {
+ return getImm3(insn) << 2 | getImm2(insn);
+}
+
+// For BFI and BFC.
+// Extract msb: Inst{4-0}.
+static inline unsigned getMsb(uint32_t insn) {
+ return slice(insn, 4, 0);
+}
+
+// For t2SBFX and t2UBFX.
+// Extract widthminus1: Inst{4-0}.
+static inline unsigned getWidthMinus1(uint32_t insn) {
+ return slice(insn, 4, 0);
+}
+
+// For t2ADDri12 and t2SUBri12.
+// imm12 = i:imm3:imm8;
+static inline unsigned getIImm3Imm8(uint32_t insn) {
+ return slice(insn, 26, 26) << 11 | getImm3(insn) << 8 | getImm8(insn);
+}
+
+// For t2MOVi16 and t2MOVTi16.
+// imm16 = imm4:i:imm3:imm8;
+static inline unsigned getImm16(uint32_t insn) {
+ return getImm4(insn) << 12 | slice(insn, 26, 26) << 11 |
+ getImm3(insn) << 8 | getImm8(insn);
+}
+
+// Inst{5-4} encodes the shift type.
+static inline unsigned getShiftTypeBits(uint32_t insn) {
+ return slice(insn, 5, 4);
+}
+
+// Inst{14-12}:Inst{7-6} encodes the imm5 shift amount.
+static inline unsigned getShiftAmtBits(uint32_t insn) {
+ return getImm3(insn) << 2 | getImm2(insn);
+}
+
+// A8.6.17 BFC
+// Encoding T1 ARMv6T2, ARMv7
+// LLVM-specific encoding for #<lsb> and #<width>
+static inline bool getBitfieldInvMask(uint32_t insn, uint32_t &mask) {
+ uint32_t lsb = getImm3(insn) << 2 | getImm2(insn);
+ uint32_t msb = getMsb(insn);
+ uint32_t Val = 0;
+ if (msb < lsb) {
+ DEBUG(errs() << "Encoding error: msb < lsb\n");
+ return false;
+ }
+ for (uint32_t i = lsb; i <= msb; ++i)
+ Val |= (1 << i);
+ mask = ~Val;
+ return true;
+}
+
+// A8.4 Shifts applied to a register
+// A8.4.1 Constant shifts
+// A8.4.3 Pseudocode details of instruction-specified shifts and rotates
+//
+// decodeImmShift() returns the shift amount and the the shift opcode.
+// Note that, as of Jan-06-2010, LLVM does not support rrx shifted operands yet.
+static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5,
+ ARM_AM::ShiftOpc &ShOp) {
+
+ assert(imm5 < 32 && "Invalid imm5 argument");
+ switch (bits2) {
+ default: assert(0 && "No such value");
+ case 0:
+ ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl);
+ return imm5;
+ case 1:
+ ShOp = ARM_AM::lsr;
+ return (imm5 == 0 ? 32 : imm5);
+ case 2:
+ ShOp = ARM_AM::asr;
+ return (imm5 == 0 ? 32 : imm5);
+ case 3:
+ ShOp = (imm5 == 0 ? ARM_AM::rrx : ARM_AM::ror);
+ return (imm5 == 0 ? 1 : imm5);
+ }
+}
+
+// A6.3.2 Modified immediate constants in Thumb instructions
+//
+// ThumbExpandImm() returns the modified immediate constant given an imm12 for
+// Thumb data-processing instructions with modified immediate.
+// See also A6.3.1 Data-processing (modified immediate).
+static inline unsigned ThumbExpandImm(unsigned imm12) {
+ assert(imm12 <= 0xFFF && "Invalid imm12 argument");
+
+ // If the leading two bits is 0b00, the modified immediate constant is
+ // obtained by splatting the low 8 bits into the first byte, every other byte,
+ // or every byte of a 32-bit value.
+ //
+ // Otherwise, a rotate right of '1':imm12<6:0> by the amount imm12<11:7> is
+ // performed.
+
+ if (slice(imm12, 11, 10) == 0) {
+ unsigned short control = slice(imm12, 9, 8);
+ unsigned imm8 = slice(imm12, 7, 0);
+ switch (control) {
+ default:
+ assert(0 && "No such value");
+ return 0;
+ case 0:
+ return imm8;
+ case 1:
+ return imm8 << 16 | imm8;
+ case 2:
+ return imm8 << 24 | imm8 << 8;
+ case 3:
+ return imm8 << 24 | imm8 << 16 | imm8 << 8 | imm8;
+ }
+ } else {
+ // A rotate is required.
+ unsigned Val = 1 << 7 | slice(imm12, 6, 0);
+ unsigned Amt = slice(imm12, 11, 7);
+ return ARM_AM::rotr32(Val, Amt);
+ }
+}
+
+static inline int decodeImm32_B_EncodingT3(uint32_t insn) {
+ bool S = slice(insn, 26, 26);
+ bool J1 = slice(insn, 13, 13);
+ bool J2 = slice(insn, 11, 11);
+ unsigned Imm21 = slice(insn, 21, 16) << 12 | slice(insn, 10, 0) << 1;
+ if (S) Imm21 |= 1 << 20;
+ if (J2) Imm21 |= 1 << 19;
+ if (J1) Imm21 |= 1 << 18;
+
+ return SignExtend32<21>(Imm21);
+}
+
+static inline int decodeImm32_B_EncodingT4(uint32_t insn) {
+ unsigned S = slice(insn, 26, 26);
+ bool I1 = slice(insn, 13, 13) == S;
+ bool I2 = slice(insn, 11, 11) == S;
+ unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1;
+ if (S) Imm25 |= 1 << 24;
+ if (I1) Imm25 |= 1 << 23;
+ if (I2) Imm25 |= 1 << 22;
+
+ return SignExtend32<25>(Imm25);
+}
+
+static inline int decodeImm32_BL(uint32_t insn) {
+ unsigned S = slice(insn, 26, 26);
+ bool I1 = slice(insn, 13, 13) == S;
+ bool I2 = slice(insn, 11, 11) == S;
+ unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1;
+ if (S) Imm25 |= 1 << 24;
+ if (I1) Imm25 |= 1 << 23;
+ if (I2) Imm25 |= 1 << 22;
+
+ return SignExtend32<25>(Imm25);
+}
+
+static inline int decodeImm32_BLX(uint32_t insn) {
+ unsigned S = slice(insn, 26, 26);
+ bool I1 = slice(insn, 13, 13) == S;
+ bool I2 = slice(insn, 11, 11) == S;
+ unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 1) << 2;
+ if (S) Imm25 |= 1 << 24;
+ if (I1) Imm25 |= 1 << 23;
+ if (I2) Imm25 |= 1 << 22;
+
+ return SignExtend32<25>(Imm25);
+}
+
+// See, for example, A8.6.221 SXTAB16.
+static inline unsigned decodeRotate(uint32_t insn) {
+ unsigned rotate = slice(insn, 5, 4);
+ return rotate << 3;
+}
+
+///////////////////////////////////////////////
+// //
+// Thumb1 instruction disassembly functions. //
+// //
+///////////////////////////////////////////////
+
+// See "Utilities for 16-bit Thumb instructions" for register naming convention.
+
+// A6.2.1 Shift (immediate), add, subtract, move, and compare
+//
+// shift immediate: tRd CPSR tRn imm5
+// add/sub register: tRd CPSR tRn tRm
+// add/sub 3-bit immediate: tRd CPSR tRn imm3
+// add/sub 8-bit immediate: tRt CPSR tRt(TIED_TO) imm8
+// mov/cmp immediate: tRt [CPSR] imm8 (CPSR present for mov)
+//
+// Special case:
+// tMOVSr: tRd tRn
+static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID
+ && "Invalid arguments");
+
+ bool Imm3 = (Opcode == ARM::tADDi3 || Opcode == ARM::tSUBi3);
+
+ // Use Rt implies use imm8.
+ bool UseRt = (Opcode == ARM::tADDi8 || Opcode == ARM::tSUBi8 ||
+ Opcode == ARM::tMOVi8 || Opcode == ARM::tCMPi8);
+
+ // Add the destination operand.
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::tGPRRegClassID,
+ UseRt ? getT1tRt(insn) : getT1tRd(insn))));
+ ++OpIdx;
+
+ // Check whether the next operand to be added is a CCR Register.
+ if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
+ assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
+ MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
+ ++OpIdx;
+ }
+
+ // Check whether the next operand to be added is a Thumb1 Register.
+ assert(OpIdx < NumOps && "More operands expected");
+ if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
+ // For UseRt, the reg operand is tied to the first reg operand.
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::tGPRRegClassID,
+ UseRt ? getT1tRt(insn) : getT1tRn(insn))));
+ ++OpIdx;
+ }
+
+ // Special case for tMOVSr.
+ if (OpIdx == NumOps)
+ return true;
+
+ // The next available operand is either a reg operand or an imm operand.
+ if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
+ // Three register operand instructions.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRm(insn))));
+ } else {
+ assert(OpInfo[OpIdx].RegClass < 0 &&
+ !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
+ && "Pure imm operand expected");
+ MI.addOperand(MCOperand::CreateImm(UseRt ? getT1Imm8(insn)
+ : (Imm3 ? getT1Imm3(insn)
+ : getT1Imm5(insn))));
+ }
+ ++OpIdx;
+
+ return true;
+}
+
+// A6.2.2 Data-processing
+//
+// tCMPr, tTST, tCMN: tRd tRn
+// tMVN, tRSB: tRd CPSR tRn
+// Others: tRd CPSR tRd(TIED_TO) tRn
+static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+ (OpInfo[1].RegClass == ARM::CCRRegClassID
+ || OpInfo[1].RegClass == ARM::tGPRRegClassID)
+ && "Invalid arguments");
+
+ // Add the destination operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRd(insn))));
+ ++OpIdx;
+
+ // Check whether the next operand to be added is a CCR Register.
+ if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
+ assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
+ MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
+ ++OpIdx;
+ }
+
+ // We have either { tRd(TIED_TO), tRn } or { tRn } remaining.
+ // Process the TIED_TO operand first.
+
+ assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
+ && "Thumb reg operand expected");
+ int Idx;
+ if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+ // The reg operand is tied to the first reg operand.
+ MI.addOperand(MI.getOperand(Idx));
+ ++OpIdx;
+ }
+
+ // Process possible next reg operand.
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
+ // Add tRn operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRn(insn))));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+// A6.2.3 Special data instructions and branch and exchange
+//
+// tADDhirr: Rd Rd(TIED_TO) Rm
+// tCMPhir: Rd Rm
+// tMOVr, tMOVgpr2gpr, tMOVgpr2tgpr, tMOVtgpr2gpr: Rd|tRd Rm|tRn
+// tBX_RET: 0 operand
+// tBX_RET_vararg: Rm
+// tBLXr_r9: Rm
+static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ // tBX_RET has 0 operand.
+ if (NumOps == 0)
+ return true;
+
+ // BX/BLX has 1 reg operand: Rm.
+ if (NumOps == 1) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ getT1Rm(insn))));
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ // Add the destination operand.
+ unsigned RegClass = OpInfo[OpIdx].RegClass;
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass,
+ IsGPR(RegClass) ? getT1Rd(insn)
+ : getT1tRd(insn))));
+ ++OpIdx;
+
+ // We have either { Rd(TIED_TO), Rm } or { Rm|tRn } remaining.
+ // Process the TIED_TO operand first.
+
+ assert(OpIdx < NumOps && "More operands expected");
+ int Idx;
+ if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+ // The reg operand is tied to the first reg operand.
+ MI.addOperand(MI.getOperand(Idx));
+ ++OpIdx;
+ }
+
+ // The next reg operand is either Rm or tRn.
+ assert(OpIdx < NumOps && "More operands expected");
+ RegClass = OpInfo[OpIdx].RegClass;
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass,
+ IsGPR(RegClass) ? getT1Rm(insn)
+ : getT1tRn(insn))));
+ ++OpIdx;
+
+ return true;
+}
+
+// A8.6.59 LDR (literal)
+//
+// tLDRpci: tRt imm8*4
+static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+ (OpInfo[1].RegClass < 0 &&
+ !OpInfo[1].isPredicate() &&
+ !OpInfo[1].isOptionalDef())
+ && "Invalid arguments");
+
+ // Add the destination operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRt(insn))));
+
+ // And the (imm8 << 2) operand.
+ MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn) << 2));
+
+ NumOpsAdded = 2;
+
+ return true;
+}
+
+// Thumb specific addressing modes (see ARMInstrThumb.td):
+//
+// t_addrmode_rr := reg + reg
+//
+// t_addrmode_s4 := reg + reg
+// reg + imm5 * 4
+//
+// t_addrmode_s2 := reg + reg
+// reg + imm5 * 2
+//
+// t_addrmode_s1 := reg + reg
+// reg + imm5
+//
+// t_addrmode_sp := sp + imm8 * 4
+//
+
+// A8.6.63 LDRB (literal)
+// A8.6.79 LDRSB (literal)
+// A8.6.75 LDRH (literal)
+// A8.6.83 LDRSH (literal)
+// A8.6.59 LDR (literal)
+//
+// These instrs calculate an address from the PC value and an immediate offset.
+// Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1)
+static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps >= 2 &&
+ OpInfo[0].RegClass == ARM::GPRRegClassID &&
+ OpInfo[1].RegClass < 0 &&
+ "Expect >= 2 operands, first as reg, and second as imm operand");
+
+ // Build the register operand, followed by the (+/-)imm12 immediate.
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ MI.addOperand(MCOperand::CreateImm(decodeImm12(insn)));
+
+ NumOpsAdded = 2;
+
+ return true;
+}
+
+
+// A6.2.4 Load/store single data item
+//
+// Load/Store Register (reg|imm): tRd tRn imm5 tRm
+// Load Register Signed Byte|Halfword: tRd tRn tRm
+static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ // Table A6-5 16-bit Thumb Load/store instructions
+ // opA = 0b0101 for STR/LDR (register) and friends.
+ // Otherwise, we have STR/LDR (immediate) and friends.
+ bool Imm5 = (opA != 5);
+
+ assert(NumOps >= 2
+ && OpInfo[0].RegClass == ARM::tGPRRegClassID
+ && OpInfo[1].RegClass == ARM::tGPRRegClassID
+ && "Expect >= 2 operands and first two as thumb reg operands");
+
+ // Add the destination reg and the base reg.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRd(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRn(insn))));
+ OpIdx = 2;
+
+ // We have either { imm5, tRm } or { tRm } remaining.
+ // Process the imm5 first. Note that STR/LDR (register) should skip the imm5
+ // offset operand for t_addrmode_s[1|2|4].
+
+ assert(OpIdx < NumOps && "More operands expected");
+
+ if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() &&
+ !OpInfo[OpIdx].isOptionalDef()) {
+
+ MI.addOperand(MCOperand::CreateImm(Imm5 ? getT1Imm5(insn) : 0));
+ ++OpIdx;
+ }
+
+ // The next reg operand is tRm, the offset.
+ assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
+ && "Thumb reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(
+ Imm5 ? 0
+ : getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRm(insn))));
+ ++OpIdx;
+
+ return true;
+}
+
+// A6.2.4 Load/store single data item
+//
+// Load/Store Register SP relative: tRt ARM::SP imm8
+static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
+ && "Unexpected opcode");
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps >= 3 &&
+ OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::GPRRegClassID &&
+ (OpInfo[2].RegClass < 0 &&
+ !OpInfo[2].isPredicate() &&
+ !OpInfo[2].isOptionalDef())
+ && "Invalid arguments");
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRt(insn))));
+ MI.addOperand(MCOperand::CreateReg(ARM::SP));
+ MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
+ NumOpsAdded = 3;
+ return true;
+}
+
+// Table A6-1 16-bit Thumb instruction encoding
+// A8.6.10 ADR
+//
+// tADDrPCi: tRt imm8
+static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(Opcode == ARM::tADDrPCi && "Unexpected opcode");
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+ (OpInfo[1].RegClass < 0 &&
+ !OpInfo[1].isPredicate() &&
+ !OpInfo[1].isOptionalDef())
+ && "Invalid arguments");
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRt(insn))));
+ MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
+ NumOpsAdded = 2;
+ return true;
+}
+
+// Table A6-1 16-bit Thumb instruction encoding
+// A8.6.8 ADD (SP plus immediate)
+//
+// tADDrSPi: tRt ARM::SP imm8
+static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(Opcode == ARM::tADDrSPi && "Unexpected opcode");
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps >= 3 &&
+ OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::GPRRegClassID &&
+ (OpInfo[2].RegClass < 0 &&
+ !OpInfo[2].isPredicate() &&
+ !OpInfo[2].isOptionalDef())
+ && "Invalid arguments");
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRt(insn))));
+ MI.addOperand(MCOperand::CreateReg(ARM::SP));
+ MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
+ NumOpsAdded = 3;
+ return true;
+}
+
+// tPUSH, tPOP: Pred-Imm Pred-CCR register_list
+//
+// where register_list = low registers + [lr] for PUSH or
+// low registers + [pc] for POP
+//
+// "low registers" is specified by Inst{7-0}
+// lr|pc is specified by Inst{8}
+static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Unexpected opcode");
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ // Handling the two predicate operands before the reglist.
+ if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+ OpIdx += 2;
+ else {
+ DEBUG(errs() << "Expected predicate operands not found.\n");
+ return false;
+ }
+
+ unsigned RegListBits = slice(insn, 8, 8) << (Opcode == ARM::tPUSH ? 14 : 15)
+ | slice(insn, 7, 0);
+
+ // Fill the variadic part of reglist.
+ for (unsigned i = 0; i < 16; ++i) {
+ if ((RegListBits >> i) & 1) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ i)));
+ ++OpIdx;
+ }
+ }
+
+ return true;
+}
+
+// A6.2.5 Miscellaneous 16-bit instructions
+// Delegate to DisassembleThumb1PushPop() for tPUSH & tPOP.
+//
+// tADDspi, tSUBspi: ARM::SP ARM::SP(TIED_TO) imm7
+// t2IT: firstcond=Inst{7-4} mask=Inst{3-0}
+// tCBNZ, tCBZ: tRd imm6*2
+// tBKPT: imm8
+// tNOP, tSEV, tYIELD, tWFE, tWFI:
+// no operand (except predicate pair)
+// tSETENDBE, tSETENDLE, :
+// no operand
+// Others: tRd tRn
+static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ if (NumOps == 0)
+ return true;
+
+ if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP)
+ return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+ // Predicate operands are handled elsewhere.
+ if (NumOps == 2 &&
+ OpInfo[0].isPredicate() && OpInfo[1].isPredicate() &&
+ OpInfo[0].RegClass < 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) {
+ return true;
+ }
+
+ if (Opcode == ARM::tADDspi || Opcode == ARM::tSUBspi) {
+ // Special case handling for tADDspi and tSUBspi.
+ // A8.6.8 ADD (SP plus immediate) & A8.6.215 SUB (SP minus immediate)
+ MI.addOperand(MCOperand::CreateReg(ARM::SP));
+ MI.addOperand(MCOperand::CreateReg(ARM::SP));
+ MI.addOperand(MCOperand::CreateImm(getT1Imm7(insn)));
+ NumOpsAdded = 3;
+ return true;
+ }
+
+ if (Opcode == ARM::t2IT) {
+ // Special case handling for If-Then.
+ // A8.6.50 IT
+ // Tag the (firstcond[0] bit << 4) along with mask.
+
+ // firstcond
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 4)));
+
+ // firstcond[0] and mask
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
+ NumOpsAdded = 2;
+ return true;
+ }
+
+ if (Opcode == ARM::tBKPT) {
+ MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); // breakpoint value
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ // CPS has a singleton $opt operand that contains the following information:
+ // The first op would be 0b10 as enable and 0b11 as disable in regular ARM,
+ // but in Thumb it's is 0 as enable and 1 as disable. So map it to ARM's
+ // default one. The second get the AIF flags from Inst{2-0}.
+ if (Opcode == ARM::tCPS) {
+ MI.addOperand(MCOperand::CreateImm(2 + slice(insn, 4, 4)));
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 2, 0)));
+ NumOpsAdded = 2;
+ return true;
+ }
+
+ assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+ (OpInfo[1].RegClass < 0 || OpInfo[1].RegClass==ARM::tGPRRegClassID)
+ && "Expect >=2 operands");
+
+ // Add the destination operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRd(insn))));
+
+ if (OpInfo[1].RegClass == ARM::tGPRRegClassID) {
+ // Two register instructions.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRn(insn))));
+ } else {
+ // CBNZ, CBZ
+ assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) &&"Unexpected opcode");
+ MI.addOperand(MCOperand::CreateImm(getT1Imm6(insn) * 2));
+ }
+
+ NumOpsAdded = 2;
+
+ return true;
+}
+
+// A8.6.53 LDM / LDMIA
+// A8.6.189 STM / STMIA
+//
+// tLDMIA_UPD/tSTMIA_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list
+// tLDMIA: tRt AM4ModeImm Pred-Imm Pred-CCR register_list
+static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps,
+ unsigned &NumOpsAdded, BO B) {
+ assert((Opcode == ARM::tLDMIA || Opcode == ARM::tLDMIA_UPD ||
+ Opcode == ARM::tSTMIA_UPD) && "Unexpected opcode");
+
+ unsigned tRt = getT1tRt(insn);
+ NumOpsAdded = 0;
+
+ // WB register, if necessary.
+ if (Opcode == ARM::tLDMIA_UPD || Opcode == ARM::tSTMIA_UPD) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ tRt)));
+ ++NumOpsAdded;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ tRt)));
+ ++NumOpsAdded;
+
+ // Handling the two predicate operands before the reglist.
+ if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
+ NumOpsAdded += 2;
+ } else {
+ DEBUG(errs() << "Expected predicate operands not found.\n");
+ return false;
+ }
+
+ unsigned RegListBits = slice(insn, 7, 0);
+
+ // Fill the variadic part of reglist.
+ for (unsigned i = 0; i < 8; ++i)
+ if ((RegListBits >> i) & 1) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ i)));
+ ++NumOpsAdded;
+ }
+
+ return true;
+}
+
+static bool DisassembleThumb1LdMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+}
+
+static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+}
+
+// A8.6.16 B Encoding T1
+// cond = Inst{11-8} & imm8 = Inst{7-0}
+// imm32 = SignExtend(imm8:'0', 32)
+//
+// tBcc: offset Pred-Imm Pred-CCR
+// tSVC: imm8 Pred-Imm Pred-CCR
+// tTRAP: 0 operand (early return)
+static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+ if (Opcode == ARM::tTRAP)
+ return true;
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps == 3 && OpInfo[0].RegClass < 0 &&
+ OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID
+ && "Exactly 3 operands expected");
+
+ unsigned Imm8 = getT1Imm8(insn);
+ MI.addOperand(MCOperand::CreateImm(
+ Opcode == ARM::tBcc ? SignExtend32<9>(Imm8 << 1) + 4
+ : (int)Imm8));
+
+ // Predicate operands by ARMBasicMCBuilder::TryPredicateAndSBitModifier().
+ NumOpsAdded = 1;
+
+ return true;
+}
+
+// A8.6.16 B Encoding T2
+// imm11 = Inst{10-0}
+// imm32 = SignExtend(imm11:'0', 32)
+//
+// tB: offset
+static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected");
+
+ unsigned Imm11 = getT1Imm11(insn);
+
+ // When executing a Thumb instruction, PC reads as the address of the current
+ // instruction plus 4. The assembler subtracts 4 from the difference between
+ // the branch instruction and the target address, disassembler has to add 4 to
+ // to compensate.
+ MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1) + 4));
+
+ NumOpsAdded = 1;
+
+ return true;
+
+}
+
+// See A6.2 16-bit Thumb instruction encoding for instruction classes
+// corresponding to op.
+//
+// Table A6-1 16-bit Thumb instruction encoding (abridged)
+// op Instruction or instruction class
+// ------ --------------------------------------------------------------------
+// 00xxxx Shift (immediate), add, subtract, move, and compare on page A6-7
+// 010000 Data-processing on page A6-8
+// 010001 Special data instructions and branch and exchange on page A6-9
+// 01001x Load from Literal Pool, see LDR (literal) on page A8-122
+// 0101xx Load/store single data item on page A6-10
+// 011xxx
+// 100xxx
+// 10100x Generate PC-relative address, see ADR on page A8-32
+// 10101x Generate SP-relative address, see ADD (SP plus immediate) on
+// page A8-28
+// 1011xx Miscellaneous 16-bit instructions on page A6-11
+// 11000x Store multiple registers, see STM / STMIA / STMEA on page A8-374
+// 11001x Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a
+// 1101xx Conditional branch, and Supervisor Call on page A6-13
+// 11100x Unconditional Branch, see B on page A8-44
+//
+static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ unsigned op1 = slice(op, 5, 4);
+ unsigned op2 = slice(op, 3, 2);
+ unsigned op3 = slice(op, 1, 0);
+ unsigned opA = slice(op, 5, 2);
+ switch (op1) {
+ case 0:
+ // A6.2.1 Shift (immediate), add, subtract, move, and compare
+ return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ case 1:
+ switch (op2) {
+ case 0:
+ switch (op3) {
+ case 0:
+ // A6.2.2 Data-processing
+ return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ case 1:
+ // A6.2.3 Special data instructions and branch and exchange
+ return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ default:
+ // A8.6.59 LDR (literal)
+ return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ }
+ break;
+ default:
+ // A6.2.4 Load/store single data item
+ return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ break;
+ }
+ break;
+ case 2:
+ switch (op2) {
+ case 0:
+ // A6.2.4 Load/store single data item
+ return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ case 1:
+ // A6.2.4 Load/store single data item
+ return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ case 2:
+ if (op3 <= 1) {
+ // A8.6.10 ADR
+ return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ } else {
+ // A8.6.8 ADD (SP plus immediate)
+ return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+ default:
+ // A6.2.5 Miscellaneous 16-bit instructions
+ return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ }
+ break;
+ case 3:
+ switch (op2) {
+ case 0:
+ if (op3 <= 1) {
+ // A8.6.189 STM / STMIA / STMEA
+ return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ } else {
+ // A8.6.53 LDM / LDMIA / LDMFD
+ return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ }
+ case 1:
+ // A6.2.6 Conditional branch, and Supervisor Call
+ return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ case 2:
+ // Unconditional Branch, see B on page A8-44
+ return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ default:
+ assert(0 && "Unreachable code");
+ break;
+ }
+ break;
+ default:
+ assert(0 && "Unreachable code");
+ break;
+ }
+
+ return false;
+}
+
+///////////////////////////////////////////////
+// //
+// Thumb2 instruction disassembly functions. //
+// //
+///////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////
+// //
+// Note: the register naming follows the ARM convention! //
+// //
+///////////////////////////////////////////////////////////
+
+static inline bool Thumb2SRSOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::t2SRSDBW: case ARM::t2SRSDB:
+ case ARM::t2SRSIAW: case ARM::t2SRSIA:
+ return true;
+ }
+}
+
+static inline bool Thumb2RFEOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::t2RFEDBW: case ARM::t2RFEDB:
+ case ARM::t2RFEIAW: case ARM::t2RFEIA:
+ return true;
+ }
+}
+
+// t2SRS[IA|DB]W/t2SRS[IA|DB]: mode_imm = Inst{4-0}
+static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
+ NumOpsAdded = 1;
+ return true;
+}
+
+// t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn
+static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ NumOpsAdded = 1;
+ return true;
+}
+
+static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ if (Thumb2SRSOpcode(Opcode))
+ return DisassembleThumb2SRS(MI, Opcode, insn, NumOps, NumOpsAdded);
+
+ if (Thumb2RFEOpcode(Opcode))
+ return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+ assert((Opcode == ARM::t2LDMIA || Opcode == ARM::t2LDMIA_UPD ||
+ Opcode == ARM::t2LDMDB || Opcode == ARM::t2LDMDB_UPD ||
+ Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD ||
+ Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD)
+ && "Unexpected opcode");
+ assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5");
+
+ NumOpsAdded = 0;
+
+ unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+
+ // Writeback to base.
+ if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD ||
+ Opcode == ARM::t2STMIA_UPD || Opcode == ARM::t2STMDB_UPD) {
+ MI.addOperand(MCOperand::CreateReg(Base));
+ ++NumOpsAdded;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(Base));
+ ++NumOpsAdded;
+
+ // Handling the two predicate operands before the reglist.
+ if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
+ NumOpsAdded += 2;
+ } else {
+ DEBUG(errs() << "Expected predicate operands not found.\n");
+ return false;
+ }
+
+ unsigned RegListBits = insn & ((1 << 16) - 1);
+
+ // Fill the variadic part of reglist.
+ for (unsigned i = 0; i < 16; ++i)
+ if ((RegListBits >> i) & 1) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ i)));
+ ++NumOpsAdded;
+ }
+
+ return true;
+}
+
+// t2LDREX: Rd Rn
+// t2LDREXD: Rd Rs Rn
+// t2LDREXB, t2LDREXH: Rd Rn
+// t2STREX: Rs Rd Rn
+// t2STREXD: Rm Rd Rs Rn
+// t2STREXB, t2STREXH: Rm Rd Rn
+static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2
+ && OpInfo[0].RegClass == ARM::GPRRegClassID
+ && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && "Expect >=2 operands and first two as reg operands");
+
+ bool isStore = (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH);
+ bool isSW = (Opcode == ARM::t2LDREX || Opcode == ARM::t2STREX);
+ bool isDW = (Opcode == ARM::t2LDREXD || Opcode == ARM::t2STREXD);
+
+ // Add the destination operand for store.
+ if (isStore) {
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::GPRRegClassID,
+ isSW ? decodeRs(insn) : decodeRm(insn))));
+ ++OpIdx;
+ }
+
+ // Source operand for store and destination operand for load.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+
+ // Thumb2 doubleword complication: with an extra source/destination operand.
+ if (isDW) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRs(insn))));
+ ++OpIdx;
+ }
+
+ // Finally add the pointer operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+
+ return true;
+}
+
+// LLVM, as of Jan-05-2010, does not output <Rt2>, i.e., Rs, in the asm.
+// Whereas the ARM Arch. Manual does not require that t2 = t+1 like in ARM ISA.
+//
+// t2LDRDi8: Rd Rs Rn imm8s4 (offset mode)
+// t2LDRDpci: Rd Rs imm8s4 (Not decoded, prefer the generic t2LDRDi8 version)
+// t2STRDi8: Rd Rs Rn imm8s4 (offset mode)
+//
+// Ditto for t2LDRD_PRE, t2LDRD_POST, t2STRD_PRE, t2STRD_POST, which are for
+// disassembly only and do not have a tied_to writeback base register operand.
+static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps >= 4
+ && OpInfo[0].RegClass == ARM::GPRRegClassID
+ && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && OpInfo[2].RegClass == ARM::GPRRegClassID
+ && OpInfo[3].RegClass < 0
+ && "Expect >= 4 operands and first 3 as reg operands");
+
+ // Add the <Rt> <Rt2> operands.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRs(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+
+ // Finally add (+/-)imm8*4, depending on the U bit.
+ int Offset = getImm8(insn) * 4;
+ if (getUBit(insn) == 0)
+ Offset = -Offset;
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ NumOpsAdded = 4;
+
+ return true;
+}
+
+// t2TBB, t2TBH: Rn Rm Pred-Imm Pred-CCR
+static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 2 && "Expect >= 2 operands");
+
+ // The generic version of TBB/TBH needs a base register.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ // Add the index register.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ NumOpsAdded = 2;
+
+ return true;
+}
+
+static inline bool Thumb2ShiftOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::t2MOVCClsl: case ARM::t2MOVCClsr:
+ case ARM::t2MOVCCasr: case ARM::t2MOVCCror:
+ case ARM::t2LSLri: case ARM::t2LSRri:
+ case ARM::t2ASRri: case ARM::t2RORri:
+ return true;
+ }
+}
+
+// A6.3.11 Data-processing (shifted register)
+//
+// Two register operands (Rn=0b1111 no 1st operand reg): Rs Rm
+// Two register operands (Rs=0b1111 no dst operand reg): Rn Rm
+// Three register operands: Rs Rn Rm
+// Three register operands: (Rn=0b1111 Conditional Move) Rs Ro(TIED_TO) Rm
+//
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms: (Rm, ConstantShiftSpecifier).
+// Constant shift specifier: Imm = (ShOp | ShAmt<<3).
+//
+// There are special instructions, like t2MOVsra_flag and t2MOVsrl_flag, which
+// only require two register operands: Rd, Rm in ARM Reference Manual terms, and
+// nothing else, because the shift amount is already specified.
+// Similar case holds for t2MOVrx, t2ADDrr, ..., etc.
+static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ // Special case handling.
+ if (Opcode == ARM::t2BR_JT) {
+ assert(NumOps == 4
+ && OpInfo[0].RegClass == ARM::GPRRegClassID
+ && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && OpInfo[2].RegClass < 0
+ && OpInfo[3].RegClass < 0
+ && "Exactly 4 operands expect and first two as reg operands");
+ // Only need to populate the src reg operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ MI.addOperand(MCOperand::CreateReg(0));
+ MI.addOperand(MCOperand::CreateImm(0));
+ MI.addOperand(MCOperand::CreateImm(0));
+ NumOpsAdded = 4;
+ return true;
+ }
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2
+ && (OpInfo[0].RegClass == ARM::GPRRegClassID ||
+ OpInfo[0].RegClass == ARM::rGPRRegClassID)
+ && (OpInfo[1].RegClass == ARM::GPRRegClassID ||
+ OpInfo[1].RegClass == ARM::rGPRRegClassID)
+ && "Expect >= 2 operands and first two as reg operands");
+
+ bool ThreeReg = (NumOps > 2 && (OpInfo[2].RegClass == ARM::GPRRegClassID ||
+ OpInfo[2].RegClass == ARM::rGPRRegClassID));
+ bool NoDstReg = (decodeRs(insn) == 0xF);
+
+ // Build the register operands, followed by the constant shift specifier.
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, OpInfo[0].RegClass,
+ NoDstReg ? decodeRn(insn) : decodeRs(insn))));
+ ++OpIdx;
+
+ if (ThreeReg) {
+ int Idx;
+ if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+ // Process tied_to operand constraint.
+ MI.addOperand(MI.getOperand(Idx));
+ ++OpIdx;
+ } else if (!NoDstReg) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[1].RegClass,
+ decodeRn(insn))));
+ ++OpIdx;
+ } else {
+ DEBUG(errs() << "Thumb2 encoding error: d==15 for three-reg operands.\n");
+ return false;
+ }
+ }
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ decodeRm(insn))));
+ ++OpIdx;
+
+ if (NumOps == OpIdx)
+ return true;
+
+ if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+ && !OpInfo[OpIdx].isOptionalDef()) {
+
+ if (Thumb2ShiftOpcode(Opcode))
+ MI.addOperand(MCOperand::CreateImm(getShiftAmtBits(insn)));
+ else {
+ // Build the constant shift specifier operand.
+ unsigned bits2 = getShiftTypeBits(insn);
+ unsigned imm5 = getShiftAmtBits(insn);
+ ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift;
+ unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp);
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
+ }
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+// A6.3.1 Data-processing (modified immediate)
+//
+// Two register operands: Rs Rn ModImm
+// One register operands (Rs=0b1111 no explicit dest reg): Rn ModImm
+// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm -
+// {t2MOVi, t2MVNi}
+//
+// ModImm = ThumbExpandImm(i:imm3:imm8)
+static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ unsigned RdRegClassID = OpInfo[0].RegClass;
+ assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+ RdRegClassID == ARM::rGPRRegClassID)
+ && "Expect >= 2 operands and first one as reg operand");
+
+ unsigned RnRegClassID = OpInfo[1].RegClass;
+ bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+ || RnRegClassID == ARM::rGPRRegClassID);
+ bool NoDstReg = (decodeRs(insn) == 0xF);
+
+ // Build the register operands, followed by the modified immediate.
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RdRegClassID,
+ NoDstReg ? decodeRn(insn) : decodeRs(insn))));
+ ++OpIdx;
+
+ if (TwoReg) {
+ if (NoDstReg) {
+ DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n");
+ return false;
+ }
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ // The modified immediate operand should come next.
+ assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
+ !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
+ && "Pure imm operand expected");
+
+ // i:imm3:imm8
+ // A6.3.2 Modified immediate constants in Thumb instructions
+ unsigned imm12 = getIImm3Imm8(insn);
+ MI.addOperand(MCOperand::CreateImm(ThumbExpandImm(imm12)));
+ ++OpIdx;
+
+ return true;
+}
+
+static inline bool Thumb2SaturateOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::t2SSAT: case ARM::t2SSAT16:
+ case ARM::t2USAT: case ARM::t2USAT16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions:
+/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt
+/// o t2SSAT16, t2USAT16: Rs sat_pos Rn
+static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned &NumOpsAdded, BO B) {
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+
+ // Disassemble the register def.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRs(insn))));
+
+ unsigned Pos = slice(insn, 4, 0);
+ if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16)
+ Pos += 1;
+ MI.addOperand(MCOperand::CreateImm(Pos));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRn(insn))));
+
+ if (NumOpsAdded == 4) {
+ ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ?
+ ARM_AM::asr : ARM_AM::lsl);
+ // Inst{14-12:7-6} encodes the imm5 shift amount.
+ unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
+ if (ShAmt == 0) {
+ if (Opc == ARM_AM::asr)
+ ShAmt = 32;
+ else
+ Opc = ARM_AM::no_shift;
+ }
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
+ }
+ return true;
+}
+
+// A6.3.3 Data-processing (plain binary immediate)
+//
+// o t2ADDri12, t2SUBri12: Rs Rn imm12
+// o t2LEApcrel (ADR): Rs imm12
+// o t2BFC (BFC): Rs Ro(TIED_TO) bf_inv_mask_imm
+// o t2BFI (BFI) (Currently not defined in LLVM as of Jan-07-2010)
+// o t2MOVi16: Rs imm16
+// o t2MOVTi16: Rs imm16
+// o t2SBFX (SBFX): Rs Rn lsb width
+// o t2UBFX (UBFX): Rs Rn lsb width
+// o t2BFI (BFI): Rs Rn lsb width
+static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ unsigned RdRegClassID = OpInfo[0].RegClass;
+ assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+ RdRegClassID == ARM::rGPRRegClassID)
+ && "Expect >= 2 operands and first one as reg operand");
+
+ unsigned RnRegClassID = OpInfo[1].RegClass;
+ bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+ || RnRegClassID == ARM::rGPRRegClassID);
+
+ // Build the register operand(s), followed by the immediate(s).
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RdRegClassID,
+ decodeRs(insn))));
+ ++OpIdx;
+
+ if (TwoReg) {
+ assert(NumOps >= 3 && "Expect >= 3 operands");
+ int Idx;
+ if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+ // Process tied_to operand constraint.
+ MI.addOperand(MI.getOperand(Idx));
+ } else {
+ // Add src reg operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+ decodeRn(insn))));
+ }
+ ++OpIdx;
+ }
+
+ if (Opcode == ARM::t2BFI) {
+ // Add val reg operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+ && !OpInfo[OpIdx].isOptionalDef()
+ && "Pure imm operand expected");
+
+ // Pre-increment OpIdx.
+ ++OpIdx;
+
+ if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12
+ || Opcode == ARM::t2LEApcrel)
+ MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
+ else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16)
+ MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
+ else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
+ uint32_t mask = 0;
+ if (getBitfieldInvMask(insn, mask))
+ MI.addOperand(MCOperand::CreateImm(mask));
+ else
+ return false;
+ } else {
+ // Handle the case of: lsb width
+ assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX)
+ && "Unexpected opcode");
+ MI.addOperand(MCOperand::CreateImm(getLsb(insn)));
+ MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
+
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+// A6.3.4 Table A6-15 Miscellaneous control instructions
+// A8.6.41 DMB
+// A8.6.42 DSB
+// A8.6.49 ISB
+static inline bool t2MiscCtrlInstr(uint32_t insn) {
+ if (slice(insn, 31, 20) == 0xf3b && slice(insn, 15, 14) == 2 &&
+ slice(insn, 12, 12) == 0)
+ return true;
+
+ return false;
+}
+
+// A6.3.4 Branches and miscellaneous control
+//
+// A8.6.16 B
+// Branches: t2B, t2Bcc -> imm operand
+//
+// Branches: t2TPsoft -> no operand
+//
+// A8.6.23 BL, BLX (immediate)
+// Branches (defined in ARMInstrThumb.td): tBLr9, tBLXi_r9 -> imm operand
+//
+// A8.6.26
+// t2BXJ -> Rn
+//
+// Miscellaneous control: t2DMBsy (and its t2DMB variants),
+// t2DSBsy (and its t2DSB varianst), t2ISBsy, t2CLREX
+// -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants)
+//
+// Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV
+// -> no operand (except pred-imm pred-ccr)
+//
+// t2DBG -> imm4 = Inst{3-0}
+//
+// t2MRS/t2MRSsys -> Rs
+// t2MSR/t2MSRsys -> Rn mask=Inst{11-8}
+// t2SMC -> imm4 = Inst{19-16}
+static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ if (NumOps == 0)
+ return true;
+
+ if (t2MiscCtrlInstr(insn))
+ return true;
+
+ switch (Opcode) {
+ case ARM::t2CLREX:
+ case ARM::t2NOP:
+ case ARM::t2YIELD:
+ case ARM::t2WFE:
+ case ARM::t2WFI:
+ case ARM::t2SEV:
+ return true;
+ default:
+ break;
+ }
+
+ // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
+ // opcodes which match the same real instruction. This is needed since there's
+ // no current handling of optional arguments. Fix here when a better handling
+ // of optional arguments is implemented.
+ if (Opcode == ARM::t2CPS3p) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
+ NumOpsAdded = 3;
+ return true;
+ }
+ if (Opcode == ARM::t2CPS2p) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags
+ NumOpsAdded = 2;
+ return true;
+ }
+ if (Opcode == ARM::t2CPS1p) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ // DBG has its option specified in Inst{3-0}.
+ if (Opcode == ARM::t2DBG) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ // MRS and MRSsys take one GPR reg Rs.
+ if (Opcode == ARM::t2MRS || Opcode == ARM::t2MRSsys) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRs(insn))));
+ NumOpsAdded = 1;
+ return true;
+ }
+ // BXJ takes one GPR reg Rn.
+ if (Opcode == ARM::t2BXJ) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ NumOpsAdded = 1;
+ return true;
+ }
+ // MSR take a mask, followed by one GPR reg Rn. The mask contains the R Bit in
+ // bit 4, and the special register fields in bits 3-0.
+ if (Opcode == ARM::t2MSR) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 20) << 4 /* R Bit */ |
+ slice(insn, 11, 8) /* Special Reg */));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ NumOpsAdded = 2;
+ return true;
+ }
+ // SMC take imm4.
+ if (Opcode == ARM::t2SMC) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16)));
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ // Add the imm operand.
+ int Offset = 0;
+
+ switch (Opcode) {
+ default:
+ assert(0 && "Unexpected opcode");
+ return false;
+ case ARM::t2B:
+ Offset = decodeImm32_B_EncodingT4(insn);
+ break;
+ case ARM::t2Bcc:
+ Offset = decodeImm32_B_EncodingT3(insn);
+ break;
+ case ARM::tBLr9:
+ Offset = decodeImm32_BL(insn);
+ break;
+ case ARM::tBLXi_r9:
+ Offset = decodeImm32_BLX(insn);
+ break;
+ }
+ // When executing a Thumb instruction, PC reads as the address of the current
+ // instruction plus 4. The assembler subtracts 4 from the difference between
+ // the branch instruction and the target address, disassembler has to add 4 to
+ // to compensate.
+ MI.addOperand(MCOperand::CreateImm(Offset + 4));
+
+ NumOpsAdded = 1;
+
+ return true;
+}
+
+static inline bool Thumb2PreloadOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::t2PLDi12: case ARM::t2PLDi8:
+ case ARM::t2PLDs:
+ case ARM::t2PLDWi12: case ARM::t2PLDWi8:
+ case ARM::t2PLDWs:
+ case ARM::t2PLIi12: case ARM::t2PLIi8:
+ case ARM::t2PLIs:
+ return true;
+ }
+}
+
+static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ // Preload Data/Instruction requires either 2 or 3 operands.
+ // t2PLDi12, t2PLDi8, t2PLDpci: Rn [+/-]imm12/imm8
+ // t2PLDr: Rn Rm
+ // t2PLDs: Rn Rm imm2=Inst{5-4}
+ // Same pattern applies for t2PLDW* and t2PLI*.
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2 &&
+ OpInfo[0].RegClass == ARM::GPRRegClassID &&
+ "Expect >= 2 operands and first one as reg operand");
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+
+ if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ } else {
+ assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+ && !OpInfo[OpIdx].isOptionalDef()
+ && "Pure imm operand expected");
+ int Offset = 0;
+ if (slice(insn, 19, 16) == 0xFF) {
+ bool Negative = slice(insn, 23, 23) == 0;
+ unsigned Imm12 = getImm12(insn);
+ Offset = Negative ? -1 - Imm12 : 1 * Imm12;
+ } else if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 ||
+ Opcode == ARM::t2PLIi8) {
+ // A8.6.117 Encoding T2: add = FALSE
+ unsigned Imm8 = getImm8(insn);
+ Offset = -1 - Imm8;
+ } else // The i12 forms. See, for example, A8.6.117 Encoding T1.
+ Offset = decodeImm12(insn);
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ }
+ ++OpIdx;
+
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
+ !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ // Fills in the shift amount for t2PLDs, t2PLDWs, t2PLIs.
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 5, 4)));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+// A6.3.10 Store single data item
+// A6.3.9 Load byte, memory hints
+// A6.3.8 Load halfword, memory hints
+// A6.3.7 Load word
+//
+// For example,
+//
+// t2LDRi12: Rd Rn (+)imm12
+// t2LDRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2LDRs: Rd Rn Rm ConstantShiftSpecifier (see also
+// DisassembleThumb2DPSoReg)
+// t2LDR_POST: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2LDR_PRE: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+//
+// t2STRi12: Rd Rn (+)imm12
+// t2STRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2STRs: Rd Rn Rm ConstantShiftSpecifier (see also
+// DisassembleThumb2DPSoReg)
+// t2STR_POST: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2STR_PRE: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+//
+// Note that for indexed modes, the Rn(TIED_TO) operand needs to be populated
+// correctly, as LLVM AsmPrinter depends on it. For indexed stores, the first
+// operand is Rn; for all the other instructions, Rd is the first operand.
+//
+// Delegates to DisassembleThumb2PreLoad() for preload data/instruction.
+// Delegates to DisassembleThumb2Ldpci() for load * literal operations.
+static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ unsigned Rn = decodeRn(insn);
+
+ if (Thumb2PreloadOpcode(Opcode))
+ return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+ // See, for example, A6.3.7 Load word: Table A6-18 Load word.
+ if (Load && Rn == 15)
+ return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 3 &&
+ OpInfo[0].RegClass == ARM::GPRRegClassID &&
+ OpInfo[1].RegClass == ARM::GPRRegClassID &&
+ "Expect >= 3 operands and first two as reg operands");
+
+ bool ThreeReg = (OpInfo[2].RegClass == ARM::GPRRegClassID);
+ bool TIED_TO = ThreeReg && TID.getOperandConstraint(2, TOI::TIED_TO) != -1;
+ bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td
+
+ // Build the register operands, followed by the immediate.
+ unsigned R0, R1, R2 = 0;
+ unsigned Rd = decodeRd(insn);
+ int Imm = 0;
+
+ if (!Load && TIED_TO) {
+ R0 = Rn;
+ R1 = Rd;
+ } else {
+ R0 = Rd;
+ R1 = Rn;
+ }
+ if (ThreeReg) {
+ if (TIED_TO) {
+ R2 = Rn;
+ Imm = decodeImm8(insn);
+ } else {
+ R2 = decodeRm(insn);
+ // See, for example, A8.6.64 LDRB (register).
+ // And ARMAsmPrinter::printT2AddrModeSoRegOperand().
+ // LSL is the default shift opc, and LLVM does not expect it to be encoded
+ // as part of the immediate operand.
+ // Imm = ARM_AM::getSORegOpc(ARM_AM::lsl, slice(insn, 5, 4));
+ Imm = slice(insn, 5, 4);
+ }
+ } else {
+ if (Imm12)
+ Imm = getImm12(insn);
+ else
+ Imm = decodeImm8(insn);
+ }
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ R0)));
+ ++OpIdx;
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ R1)));
+ ++OpIdx;
+
+ if (ThreeReg) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ R2)));
+ ++OpIdx;
+ }
+
+ assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+ && !OpInfo[OpIdx].isOptionalDef()
+ && "Pure imm operand expected");
+
+ MI.addOperand(MCOperand::CreateImm(Imm));
+ ++OpIdx;
+
+ return true;
+}
+
+// A6.3.12 Data-processing (register)
+//
+// Two register operands [rotate]: Rs Rm [rotation(= (rotate:'000'))]
+// Three register operands only: Rs Rn Rm
+// Three register operands [rotate]: Rs Rn Rm [rotation(= (rotate:'000'))]
+//
+// Parallel addition and subtraction 32-bit Thumb instructions: Rs Rn Rm
+//
+// Miscellaneous operations: Rs [Rn] Rm
+static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2 &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+ "Expect >= 2 operands and first two as reg operands");
+
+ // Build the register operands, followed by the optional rotation amount.
+
+ bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::rGPRRegClassID;
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRs(insn))));
+ ++OpIdx;
+
+ if (ThreeReg) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRm(insn))));
+ ++OpIdx;
+
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ // Add the rotation amount immediate.
+ MI.addOperand(MCOperand::CreateImm(decodeRotate(insn)));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+// A6.3.16 Multiply, multiply accumulate, and absolute difference
+//
+// t2MLA, t2MLS, t2SMMLA, t2SMMLS: Rs Rn Rm Ra=Inst{15-12}
+// t2MUL, t2SMMUL: Rs Rn Rm
+// t2SMLA[BB|BT|TB|TT|WB|WT]: Rs Rn Rm Ra=Inst{15-12}
+// t2SMUL[BB|BT|TB|TT|WB|WT]: Rs Rn Rm
+//
+// Dual halfword multiply: t2SMUAD[X], t2SMUSD[X], t2SMLAD[X], t2SMLSD[X]:
+// Rs Rn Rm Ra=Inst{15-12}
+//
+// Unsigned Sum of Absolute Differences [and Accumulate]
+// Rs Rn Rm [Ra=Inst{15-12}]
+static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+ assert(NumOps >= 3 &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[2].RegClass == ARM::rGPRRegClassID &&
+ "Expect >= 3 operands and first three as reg operands");
+
+ // Build the register operands.
+
+ bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRs(insn))));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRn(insn))));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRm(insn))));
+
+ if (FourReg)
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRd(insn))));
+
+ NumOpsAdded = FourReg ? 4 : 3;
+
+ return true;
+}
+
+// A6.3.17 Long multiply, long multiply accumulate, and divide
+//
+// t2SMULL, t2UMULL, t2SMLAL, t2UMLAL, t2UMAAL: RdLo RdHi Rn Rm
+// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
+//
+// Halfword multiple accumulate long: t2SMLAL<x><y>: RdLo RdHi Rn Rm
+// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
+//
+// Dual halfword multiple: t2SMLALD[X], t2SMLSLD[X]: RdLo RdHi Rn Rm
+// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
+//
+// Signed/Unsigned divide: t2SDIV, t2UDIV: Rs Rn Rm
+static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+ assert(NumOps >= 3 &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[2].RegClass == ARM::rGPRRegClassID &&
+ "Expect >= 3 operands and first three as reg operands");
+
+ bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
+
+ // Build the register operands.
+
+ if (FourReg)
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRd(insn))));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRs(insn))));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRn(insn))));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRm(insn))));
+
+ if (FourReg)
+ NumOpsAdded = 4;
+ else
+ NumOpsAdded = 3;
+
+ return true;
+}
+
+// See A6.3 32-bit Thumb instruction encoding for instruction classes
+// corresponding to (op1, op2, op).
+//
+// Table A6-9 32-bit Thumb instruction encoding
+// op1 op2 op Instruction class, see
+// --- ------- -- -----------------------------------------------------------
+// 01 00xx0xx - Load/store multiple on page A6-23
+// 00xx1xx - Load/store dual, load/store exclusive, table branch on
+// page A6-24
+// 01xxxxx - Data-processing (shifted register) on page A6-31
+// 1xxxxxx - Coprocessor instructions on page A6-40
+// 10 x0xxxxx 0 Data-processing (modified immediate) on page A6-15
+// x1xxxxx 0 Data-processing (plain binary immediate) on page A6-19
+// - 1 Branches and miscellaneous control on page A6-20
+// 11 000xxx0 - Store single data item on page A6-30
+// 001xxx0 - Advanced SIMD element or structure load/store instructions
+// on page A7-27
+// 00xx001 - Load byte, memory hints on page A6-28
+// 00xx011 - Load halfword, memory hints on page A6-26
+// 00xx101 - Load word on page A6-25
+// 00xx111 - UNDEFINED
+// 010xxxx - Data-processing (register) on page A6-33
+// 0110xxx - Multiply, multiply accumulate, and absolute difference on
+// page A6-38
+// 0111xxx - Long multiply, long multiply accumulate, and divide on
+// page A6-39
+// 1xxxxxx - Coprocessor instructions on page A6-40
+//
+static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
+ MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps,
+ unsigned &NumOpsAdded, BO B) {
+
+ switch (op1) {
+ case 1:
+ if (slice(op2, 6, 5) == 0) {
+ if (slice(op2, 2, 2) == 0) {
+ // Load/store multiple.
+ return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+
+ // Load/store dual, load/store exclusive, table branch, otherwise.
+ assert(slice(op2, 2, 2) == 1 && "Thumb2 encoding error!");
+ if ((ARM::t2LDREX <= Opcode && Opcode <= ARM::t2LDREXH) ||
+ (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH)) {
+ // Load/store exclusive.
+ return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+ if (Opcode == ARM::t2LDRDi8 ||
+ Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST ||
+ Opcode == ARM::t2STRDi8 ||
+ Opcode == ARM::t2STRD_PRE || Opcode == ARM::t2STRD_POST) {
+ // Load/store dual.
+ return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+ if (Opcode == ARM::t2TBB || Opcode == ARM::t2TBH) {
+ // Table branch.
+ return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ }
+ } else if (slice(op2, 6, 5) == 1) {
+ // Data-processing (shifted register).
+ return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ }
+
+ // FIXME: A6.3.18 Coprocessor instructions
+ // But see ThumbDisassembler::getInstruction().
+
+ break;
+ case 2:
+ if (op == 0) {
+ if (slice(op2, 5, 5) == 0)
+ // Data-processing (modified immediate)
+ return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ if (Thumb2SaturateOpcode(Opcode))
+ return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B);
+
+ // Data-processing (plain binary immediate)
+ return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+ // Branches and miscellaneous control on page A6-20.
+ return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ case 3:
+ switch (slice(op2, 6, 5)) {
+ case 0:
+ // Load/store instructions...
+ if (slice(op2, 0, 0) == 0) {
+ if (slice(op2, 4, 4) == 0) {
+ // Store single data item on page A6-30
+ return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded,
+ B);
+ } else {
+ // FIXME: Advanced SIMD element or structure load/store instructions.
+ // But see ThumbDisassembler::getInstruction().
+ ;
+ }
+ } else {
+ // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word
+ return DisassembleThumb2LdSt(true, MI, Opcode, insn, NumOps,
+ NumOpsAdded, B);
+ }
+ break;
+ case 1:
+ if (slice(op2, 4, 4) == 0) {
+ // A6.3.12 Data-processing (register)
+ return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ } else if (slice(op2, 3, 3) == 0) {
+ // A6.3.16 Multiply, multiply accumulate, and absolute difference
+ return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ } else {
+ // A6.3.17 Long multiply, long multiply accumulate, and divide
+ return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+ break;
+ default:
+ // FIXME: A6.3.18 Coprocessor instructions
+ // But see ThumbDisassembler::getInstruction().
+ ;
+ break;
+ }
+
+ break;
+ default:
+ assert(0 && "Thumb2 encoding error!");
+ break;
+ }
+
+ return false;
+}
+
+static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
+
+ uint16_t HalfWord = slice(insn, 31, 16);
+
+ if (HalfWord == 0) {
+ // A6.2 16-bit Thumb instruction encoding
+ // op = bits[15:10]
+ uint16_t op = slice(insn, 15, 10);
+ return DisassembleThumb1(op, MI, Opcode, insn, NumOps, NumOpsAdded,
+ Builder);
+ }
+
+ unsigned bits15_11 = slice(HalfWord, 15, 11);
+
+ // A6.1 Thumb instruction set encoding
+ if (!(bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F)) {
+ assert("Bits[15:11] first halfword of Thumb2 instruction is out of range");
+ return false;
+ }
+
+ // A6.3 32-bit Thumb instruction encoding
+
+ uint16_t op1 = slice(HalfWord, 12, 11);
+ uint16_t op2 = slice(HalfWord, 10, 4);
+ uint16_t op = slice(insn, 15, 15);
+
+ return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded,
+ Builder);
+}
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
new file mode 100644
index 0000000..1499da0
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -0,0 +1,711 @@
+//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARMBaseInfo.h"
+#include "ARMInstPrinter.h"
+#include "ARMAddressingModes.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "ARMGenAsmWriter.inc"
+
+StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+
+void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+ unsigned Opcode = MI->getOpcode();
+
+ // Check for MOVs and print canonical forms, instead.
+ if (Opcode == ARM::MOVs) {
+ // FIXME: Thumb variants?
+ const MCOperand &Dst = MI->getOperand(0);
+ const MCOperand &MO1 = MI->getOperand(1);
+ const MCOperand &MO2 = MI->getOperand(2);
+ const MCOperand &MO3 = MI->getOperand(3);
+
+ O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
+ printSBitModifierOperand(MI, 6, O);
+ printPredicateOperand(MI, 4, O);
+
+ O << '\t' << getRegisterName(Dst.getReg())
+ << ", " << getRegisterName(MO1.getReg());
+
+ if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx)
+ return;
+
+ O << ", ";
+
+ if (MO2.getReg()) {
+ O << getRegisterName(MO2.getReg());
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+ } else {
+ O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ }
+ return;
+ }
+
+ // A8.6.123 PUSH
+ if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "push";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2STMDB_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ return;
+ }
+
+ // A8.6.122 POP
+ if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2LDMIA_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ return;
+ }
+
+ // A8.6.355 VPUSH
+ if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpush";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ return;
+ }
+
+ // A8.6.354 VPOP
+ if ((Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMDIA_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpop";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ return;
+ }
+
+ printInstruction(MI, O);
+}
+
+void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ unsigned Reg = Op.getReg();
+ O << getRegisterName(Reg);
+ } else if (Op.isImm()) {
+ O << '#' << Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+ }
+}
+
+static void printSOImm(raw_ostream &O, int64_t V, raw_ostream *CommentStream,
+ const MCAsmInfo *MAI) {
+ // Break it up into two parts that make up a shifter immediate.
+ V = ARM_AM::getSOImmVal(V);
+ assert(V != -1 && "Not a valid so_imm value!");
+
+ unsigned Imm = ARM_AM::getSOImmValImm(V);
+ unsigned Rot = ARM_AM::getSOImmValRot(V);
+
+ // Print low-level immediate formation info, per
+ // A5.1.3: "Data-processing operands - Immediate".
+ if (Rot) {
+ O << "#" << Imm << ", " << Rot;
+ // Pretty printed version.
+ if (CommentStream)
+ *CommentStream << (int)ARM_AM::rotr32(Imm, Rot) << "\n";
+ } else {
+ O << "#" << Imm;
+ }
+}
+
+
+/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit
+/// immediate in bits 0-7.
+void ARMInstPrinter::printSOImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isImm() && "Not a valid so_imm value!");
+ printSOImm(O, MO.getImm(), CommentStream, &MAI);
+}
+
+// so_reg is a 4-operand unit corresponding to register forms of the A5.1
+// "Addressing Mode 1 - Data-processing operands" forms. This includes:
+// REG 0 0 - e.g. R5
+// REG REG 0,SH_OPC - e.g. R5, ROR R3
+// REG 0 IMM,SH_OPC - e.g. R5, LSL #3
+void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+ O << getRegisterName(MO1.getReg());
+
+ // Print the shift opc.
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (MO2.getReg()) {
+ O << ' ' << getRegisterName(MO2.getReg());
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+ } else if (ShOpc != ARM_AM::rrx) {
+ O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
+ }
+}
+
+
+void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, O);
+ return;
+ }
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ if (!MO2.getReg()) {
+ if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
+ O << ", #"
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << ARM_AM::getAM2Offset(MO3.getImm());
+ O << "]";
+ return;
+ }
+
+ O << ", "
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << getRegisterName(MO2.getReg());
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
+ << " #" << ShImm;
+ O << "]";
+}
+
+void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (!MO1.getReg()) {
+ unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
+ O << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+ << ImmOffs;
+ return;
+ }
+
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+ << getRegisterName(MO1.getReg());
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
+ << " #" << ShImm;
+}
+
+void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+ O << '[' << getRegisterName(MO1.getReg());
+
+ if (MO2.getReg()) {
+ O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm())
+ << getRegisterName(MO2.getReg()) << ']';
+ return;
+ }
+
+ if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
+ O << ", #"
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+ << ImmOffs;
+ O << ']';
+}
+
+void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (MO1.getReg()) {
+ O << (char)ARM_AM::getAM3Op(MO2.getImm())
+ << getRegisterName(MO1.getReg());
+ return;
+ }
+
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
+ O << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
+ << ImmOffs;
+}
+
+void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum)
+ .getImm());
+ O << ARM_AM::getAMSubModeStr(Mode);
+}
+
+void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, OpNum, O);
+ return;
+ }
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
+ O << ", #"
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
+ << ImmOffs * 4;
+ }
+ O << "]";
+}
+
+void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << "[" << getRegisterName(MO1.getReg());
+ if (MO2.getImm()) {
+ // FIXME: Both darwin as and GNU as violate ARM docs here.
+ O << ", :" << (MO2.getImm() << 3);
+ }
+ O << "]";
+}
+
+void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ if (MO.getReg() == 0)
+ O << "!";
+ else
+ O << ", " << getRegisterName(MO.getReg());
+}
+
+void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ uint32_t v = ~MO.getImm();
+ int32_t lsb = CountTrailingZeros_32(v);
+ int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
+ assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
+ O << '#' << lsb << ", #" << width;
+}
+
+void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_MB::MemBOptToString(val);
+}
+
+void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ switch (Opc) {
+ case ARM_AM::no_shift:
+ return;
+ case ARM_AM::lsl:
+ O << ", lsl #";
+ break;
+ case ARM_AM::asr:
+ O << ", asr #";
+ break;
+ default:
+ assert(0 && "unexpected shift opcode for shift immediate operand");
+ }
+ O << ARM_AM::getSORegOffset(ShiftOp);
+}
+
+void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "{";
+ for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
+ if (i != OpNum) O << ", ";
+ O << getRegisterName(MI->getOperand(i).getReg());
+ }
+ O << "}";
+}
+
+void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ if (Op.getImm())
+ O << "be";
+ else
+ O << "le";
+}
+
+void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ O << ARM_PROC::IModToString(Op.getImm());
+}
+
+void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ unsigned IFlags = Op.getImm();
+ for (int i=2; i >= 0; --i)
+ if (IFlags & (1 << i))
+ O << ARM_PROC::IFlagsToString(1 << i);
+}
+
+void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ unsigned SpecRegRBit = Op.getImm() >> 4;
+ unsigned Mask = Op.getImm() & 0xf;
+
+ if (SpecRegRBit)
+ O << "spsr";
+ else
+ O << "cpsr";
+
+ if (Mask) {
+ O << '_';
+ if (Mask & 8) O << 'f';
+ if (Mask & 4) O << 's';
+ if (Mask & 2) O << 'x';
+ if (Mask & 1) O << 'c';
+ }
+}
+
+void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ O << '#';
+ if (Op.getImm() < 0)
+ O << '-' << (-Op.getImm() - 1);
+ else
+ O << Op.getImm();
+}
+
+void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+ if (CC != ARMCC::AL)
+ O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+ O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNum).getReg()) {
+ assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
+ "Expect ARM CPSR register!");
+ O << 's';
+ }
+}
+
+void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "p" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "c" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
+}
+
+void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "#" << MI->getOperand(OpNum).getImm() * 4;
+}
+
+void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ // (3 - the number of trailing zeros) is the number of then / else.
+ unsigned Mask = MI->getOperand(OpNum).getImm();
+ unsigned CondBit0 = Mask >> 4 & 1;
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+ bool T = ((Mask >> Pos) & 1) == CondBit0;
+ if (T)
+ O << 't';
+ else
+ O << 'e';
+ }
+}
+
+void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, O);
+ return;
+ }
+
+ O << "[" << getRegisterName(MO1.getReg());
+ if (unsigned RegNum = MO2.getReg())
+ O << ", " << getRegisterName(RegNum);
+ O << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O,
+ unsigned Scale) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, O);
+ return;
+ }
+
+ O << "[" << getRegisterName(MO1.getReg());
+ if (unsigned ImmOffs = MO2.getImm())
+ O << ", #" << ImmOffs * Scale;
+ O << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 1);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 2);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+}
+
+void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+}
+
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms.
+// REG 0 0 - e.g. R5
+// REG IMM, SH_OPC - e.g. R5, LSL #3
+void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ unsigned Reg = MO1.getReg();
+ O << getRegisterName(Reg);
+
+ // Print the shift opc.
+ assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc != ARM_AM::rrx)
+ O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
+}
+
+void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, OpNum, O);
+ return;
+ }
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ bool isSub = OffImm < 0;
+ // Special value for #-0. All others are normal.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ if (isSub)
+ O << ", #-" << -OffImm;
+ else if (OffImm > 0)
+ O << ", #" << OffImm;
+ O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ // Don't print +0.
+ if (OffImm < 0)
+ O << ", #-" << -OffImm;
+ else if (OffImm > 0)
+ O << ", #" << OffImm;
+ O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm() / 4;
+ // Don't print +0.
+ if (OffImm < 0)
+ O << ", #-" << -OffImm * 4;
+ else if (OffImm > 0)
+ O << ", #" << OffImm * 4;
+ O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ int32_t OffImm = (int32_t)MO1.getImm();
+ // Don't print +0.
+ if (OffImm < 0)
+ O << "#-" << -OffImm;
+ else if (OffImm > 0)
+ O << "#" << OffImm;
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ int32_t OffImm = (int32_t)MO1.getImm() / 4;
+ // Don't print +0.
+ if (OffImm < 0)
+ O << "#-" << -OffImm * 4;
+ else if (OffImm > 0)
+ O << "#" << OffImm * 4;
+}
+
+void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ assert(MO2.getReg() && "Invalid so_reg load / store address!");
+ O << ", " << getRegisterName(MO2.getReg());
+
+ unsigned ShAmt = MO3.getImm();
+ if (ShAmt) {
+ assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
+ O << ", lsl #" << ShAmt;
+ }
+ O << "]";
+}
+
+void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ O << '#';
+ if (MO.isFPImm()) {
+ O << (float)MO.getFPImm();
+ } else {
+ union {
+ uint32_t I;
+ float F;
+ } FPUnion;
+
+ FPUnion.I = MO.getImm();
+ O << FPUnion.F;
+ }
+}
+
+void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ O << '#';
+ if (MO.isFPImm()) {
+ O << MO.getFPImm();
+ } else {
+ // We expect the binary encoding of a floating point number here.
+ union {
+ uint64_t I;
+ double D;
+ } FPUnion;
+
+ FPUnion.I = MO.getImm();
+ O << FPUnion.D;
+ }
+}
+
+void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned EncodedImm = MI->getOperand(OpNum).getImm();
+ unsigned EltBits;
+ uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+ O << "#0x" << utohexstr(Val);
+}
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
new file mode 100644
index 0000000..679d313
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -0,0 +1,111 @@
+//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTPRINTER_H
+#define ARMINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+ class MCOperand;
+
+class ARMInstPrinter : public MCInstPrinter {
+public:
+ ARMInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+
+ static const char *getInstructionName(unsigned Opcode);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printSOImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, unsigned Scale);
+ void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCPSOptionOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printNegZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt
new file mode 100644
index 0000000..18645c0
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMARMAsmPrinter
+ ARMInstPrinter.cpp
+ )
+add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile b/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile
new file mode 100644
index 0000000..65d372e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMAsmPrinter
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
new file mode 100644
index 0000000..f9e86eb
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -0,0 +1,321 @@
+//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
+// multiple and add / sub instructions) when special VMLx hazards are detected.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mlx-expansion"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
+static cl::opt<unsigned>
+ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
+
+STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
+
+namespace {
+ struct MLxExpansion : public MachineFunctionPass {
+ static char ID;
+ MLxExpansion() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM MLA / MLS expansion pass";
+ }
+
+ private:
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ unsigned MIIdx;
+ MachineInstr* LastMIs[4];
+
+ void clearStack();
+ void pushStack(MachineInstr *MI);
+ MachineInstr *getAccDefMI(MachineInstr *MI) const;
+ unsigned getDefReg(MachineInstr *MI) const;
+ bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
+ bool FindMLxHazard(MachineInstr *MI) const;
+ void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned MulOpc, unsigned AddSubOpc,
+ bool NegAcc, bool HasLane);
+ bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
+ };
+ char MLxExpansion::ID = 0;
+}
+
+void MLxExpansion::clearStack() {
+ std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0);
+ MIIdx = 0;
+}
+
+void MLxExpansion::pushStack(MachineInstr *MI) {
+ LastMIs[MIIdx] = MI;
+ if (++MIIdx == 4)
+ MIIdx = 0;
+}
+
+MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
+ // Look past COPY and INSERT_SUBREG instructions to find the
+ // real definition MI. This is important for _sfp instructions.
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return 0;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ while (true) {
+ if (DefMI->getParent() != MBB)
+ break;
+ if (DefMI->isCopyLike()) {
+ Reg = DefMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ } else if (DefMI->isInsertSubreg()) {
+ Reg = DefMI->getOperand(2).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ }
+ break;
+ }
+ return DefMI;
+}
+
+unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI->hasOneNonDBGUse(Reg))
+ return Reg;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *UseMI = &*MRI->use_nodbg_begin(Reg);
+ if (UseMI->getParent() != MBB)
+ return Reg;
+
+ while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
+ Reg = UseMI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI->hasOneNonDBGUse(Reg))
+ return Reg;
+ UseMI = &*MRI->use_nodbg_begin(Reg);
+ if (UseMI->getParent() != MBB)
+ return Reg;
+ }
+
+ return Reg;
+}
+
+bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
+ const TargetInstrDesc &TID = MI->getDesc();
+ // FIXME: Detect integer instructions properly.
+ unsigned Domain = TID.TSFlags & ARMII::DomainMask;
+ if (Domain == ARMII::DomainVFP) {
+ unsigned Opcode = TID.getOpcode();
+ if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
+ Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ } else if (Domain == ARMII::DomainNEON) {
+ if (TID.mayStore() || TID.mayLoad())
+ return false;
+ } else {
+ return false;
+ }
+
+ return MI->readsRegister(Reg, TRI);
+ return false;
+}
+
+
+bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const {
+ if (NumExpand >= ExpandLimit)
+ return false;
+
+ if (ForceExapnd)
+ return true;
+
+ MachineInstr *DefMI = getAccDefMI(MI);
+ if (TII->isFpMLxInstruction(DefMI->getOpcode()))
+ // r0 = vmla
+ // r3 = vmla r0, r1, r2
+ // takes 16 - 17 cycles
+ //
+ // r0 = vmla
+ // r4 = vmul r1, r2
+ // r3 = vadd r0, r4
+ // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
+ return true;
+
+ // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
+ // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
+ // preserves the in-order retirement of the instructions.
+ // Look at the next few instructions, if *most* of them can cause hazards,
+ // then the scheduler can't *fix* this, we'd better break up the VMLA.
+ for (unsigned i = 1; i <= 4; ++i) {
+ int Idx = ((int)MIIdx - i + 4) % 4;
+ MachineInstr *NextMI = LastMIs[Idx];
+ if (!NextMI)
+ continue;
+
+ if (TII->canCauseFpMLxStall(NextMI->getOpcode()))
+ return true;
+
+ // Look for VMLx RAW hazard.
+ if (hasRAWHazard(getDefReg(MI), NextMI))
+ return true;
+ }
+
+ return false;
+}
+
+/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
+/// of MUL + ADD / SUB instructions.
+void
+MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned MulOpc, unsigned AddSubOpc,
+ bool NegAcc, bool HasLane) {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool DstDead = MI->getOperand(0).isDead();
+ unsigned AccReg = MI->getOperand(1).getReg();
+ unsigned Src1Reg = MI->getOperand(2).getReg();
+ unsigned Src2Reg = MI->getOperand(3).getReg();
+ bool Src1Kill = MI->getOperand(2).isKill();
+ bool Src2Kill = MI->getOperand(3).isKill();
+ unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
+ unsigned NextOp = HasLane ? 5 : 4;
+ ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
+ unsigned PredReg = MI->getOperand(++NextOp).getReg();
+
+ const TargetInstrDesc &TID1 = TII->get(MulOpc);
+ const TargetInstrDesc &TID2 = TII->get(AddSubOpc);
+ unsigned TmpReg = MRI->createVirtualRegister(TID1.getRegClass(0, TRI));
+
+ MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID1, TmpReg)
+ .addReg(Src1Reg, getKillRegState(Src1Kill))
+ .addReg(Src2Reg, getKillRegState(Src2Kill));
+ if (HasLane)
+ MIB.addImm(LaneImm);
+ MIB.addImm(Pred).addReg(PredReg);
+
+ MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID2)
+ .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
+
+ if (NegAcc) {
+ bool AccKill = MRI->hasOneNonDBGUse(AccReg);
+ MIB.addReg(TmpReg, getKillRegState(true))
+ .addReg(AccReg, getKillRegState(AccKill));
+ } else {
+ MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
+ }
+ MIB.addImm(Pred).addReg(PredReg);
+
+ DEBUG({
+ dbgs() << "Expanding: " << *MI;
+ dbgs() << " to:\n";
+ MachineBasicBlock::iterator MII = MI;
+ MII = llvm::prior(MII);
+ MachineInstr &MI2 = *MII;
+ MII = llvm::prior(MII);
+ MachineInstr &MI1 = *MII;
+ dbgs() << " " << MI1;
+ dbgs() << " " << MI2;
+ });
+
+ MI->eraseFromParent();
+ ++NumExpand;
+}
+
+bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ clearStack();
+
+ unsigned Skip = 0;
+ MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
+ while (MII != E) {
+ MachineInstr *MI = &*MII;
+
+ if (MI->isLabel() || MI->isImplicitDef() || MI->isCopy()) {
+ ++MII;
+ continue;
+ }
+
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.isBarrier()) {
+ clearStack();
+ Skip = 0;
+ ++MII;
+ continue;
+ }
+
+ unsigned Domain = TID.TSFlags & ARMII::DomainMask;
+ if (Domain == ARMII::DomainGeneral) {
+ if (++Skip == 2)
+ // Assume dual issues of non-VFP / NEON instructions.
+ pushStack(0);
+ } else {
+ Skip = 0;
+
+ unsigned MulOpc, AddSubOpc;
+ bool NegAcc, HasLane;
+ if (!TII->isFpMLxInstruction(TID.getOpcode(),
+ MulOpc, AddSubOpc, NegAcc, HasLane) ||
+ !FindMLxHazard(MI))
+ pushStack(MI);
+ else {
+ ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
+ E = MBB.rend(); // May have changed if MI was the 1st instruction.
+ Changed = true;
+ continue;
+ }
+ }
+
+ ++MII;
+ }
+
+ return Changed;
+}
+
+bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
+ TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
+ TRI = Fn.getTarget().getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= ExpandFPMLxInstructions(MBB);
+ }
+
+ return Modified;
+}
+
+FunctionPass *llvm::createMLxExpansionPass() {
+ return new MLxExpansion();
+}
diff --git a/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp b/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp
new file mode 100644
index 0000000..97e54bf
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp
@@ -0,0 +1,141 @@
+//===-- NEONMoveFix.cpp - Convert vfp reg-reg moves into neon ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "neon-mov-fix"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMInstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumVMovs, "Number of reg-reg moves converted");
+
+namespace {
+ struct NEONMoveFixPass : public MachineFunctionPass {
+ static char ID;
+ NEONMoveFixPass() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "NEON reg-reg move conversion";
+ }
+
+ private:
+ const TargetRegisterInfo *TRI;
+ const ARMBaseInstrInfo *TII;
+
+ typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+
+ bool InsertMoves(MachineBasicBlock &MBB);
+ };
+ char NEONMoveFixPass::ID = 0;
+}
+
+bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
+ RegMap Defs;
+ bool Modified = false;
+
+ // Walk over MBB tracking the def points of the registers.
+ MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+ MachineBasicBlock::iterator NextMII;
+ for (; MII != E; MII = NextMII) {
+ NextMII = llvm::next(MII);
+ MachineInstr *MI = &*MII;
+
+ if (MI->getOpcode() == ARM::VMOVD &&
+ !TII->isPredicated(MI)) {
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ // If we do not find an instruction defining the reg, this means the
+ // register should be live-in for this BB. It's always to better to use
+ // NEON reg-reg moves.
+ unsigned Domain = ARMII::DomainNEON;
+ RegMap::iterator DefMI = Defs.find(SrcReg);
+ if (DefMI != Defs.end()) {
+ Domain = DefMI->second->getDesc().TSFlags & ARMII::DomainMask;
+ // Instructions in general domain are subreg accesses.
+ // Map them to NEON reg-reg moves.
+ if (Domain == ARMII::DomainGeneral)
+ Domain = ARMII::DomainNEON;
+ }
+
+ if (Domain & ARMII::DomainNEON) {
+ // Convert VMOVD to VMOVDneon
+ unsigned DestReg = MI->getOperand(0).getReg();
+
+ DEBUG({errs() << "vmov convert: "; MI->dump();});
+
+ // It's safe to ignore imp-defs / imp-uses here, since:
+ // - We're running late, no intelligent condegen passes should be run
+ // afterwards
+ // - The imp-defs / imp-uses are superregs only, we don't care about
+ // them.
+ AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(),
+ TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg));
+ MBB.erase(MI);
+ MachineBasicBlock::iterator I = prior(NextMII);
+ MI = &*I;
+
+ DEBUG({errs() << " into: "; MI->dump();});
+
+ Modified = true;
+ ++NumVMovs;
+ } else {
+ assert((Domain & ARMII::DomainVFP) && "Invalid domain!");
+ // Do nothing.
+ }
+ }
+
+ // Update def information.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand& MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+
+ Defs[MOReg] = MI;
+ // Catch aliases as well.
+ for (const unsigned *R = TRI->getAliasSet(MOReg); *R; ++R)
+ Defs[*R] = MI;
+ }
+ }
+
+ return Modified;
+}
+
+bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) {
+ ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>();
+ const TargetMachine &TM = Fn.getTarget();
+
+ if (AFI->isThumb1OnlyFunction())
+ return false;
+
+ TRI = TM.getRegisterInfo();
+ TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= InsertMoves(MBB);
+ }
+
+ return Modified;
+}
+
+/// createNEONMoveFixPass - Returns an instance of the NEON reg-reg moves fix
+/// pass.
+FunctionPass *llvm::createNEONMoveFixPass() {
+ return new NEONMoveFixPass();
+}
diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
new file mode 100644
index 0000000..163a0a9
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- ARMTargetInfo.cpp - ARM Target Implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheARMTarget, llvm::TheThumbTarget;
+
+extern "C" void LLVMInitializeARMTargetInfo() {
+ RegisterTarget<Triple::arm, /*HasJIT=*/true>
+ X(TheARMTarget, "arm", "ARM");
+
+ RegisterTarget<Triple::thumb, /*HasJIT=*/true>
+ Y(TheThumbTarget, "thumb", "Thumb");
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
new file mode 100644
index 0000000..233e165
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -0,0 +1,352 @@
+//======- Thumb1FrameLowering.cpp - Thumb1 Frame Information ---*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb1 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1FrameLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ unsigned CFSize = FFI->getMaxCallFrameSize();
+ // It's not always a good idea to include the call frame as part of the
+ // stack frame. ARM (especially Thumb) has small immediate offset to
+ // address the stack frame. So a large call frame can cause poor codegen
+ // and may even makes it impossible to scavenge a register.
+ if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+ return false;
+
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static void emitSPUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetInstrInfo &TII, DebugLoc dl,
+ const Thumb1RegisterInfo &MRI,
+ int NumBytes) {
+ emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
+ MRI, dl);
+}
+
+void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned NumBytes = MFI->getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned BasePtr = RegInfo->getBaseRegister();
+
+ // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+ NumBytes = (NumBytes + 3) & ~3;
+ MFI->setStackSize(NumBytes);
+
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+
+ if (VARegSaveSize)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes);
+ return;
+ }
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ break;
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ if (STI.isTargetDarwin()) {
+ AFI->addGPRCalleeSavedArea2Frame(FI);
+ GPRCS2Size += 4;
+ } else {
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ }
+ break;
+ default:
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
+ }
+
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+ ++MBBI;
+ if (MBBI != MBB.end())
+ dl = MBBI->getDebugLoc();
+ }
+
+ // Determine starting offsets of spill areas.
+ unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+ unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+ NumBytes = DPRCSOffset;
+
+ // Adjust FP so it point to the stack slot that contains the previous FP.
+ if (hasFP(MF)) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+ .addFrameIndex(FramePtrSpillFI).addImm(0);
+ if (NumBytes > 7)
+ // If offset is > 7 then sp cannot be adjusted in a single instruction,
+ // try restoring from fp instead.
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ if (NumBytes)
+ // Insert it after all the callee-save spills.
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes);
+
+ if (STI.isTargetELF() && hasFP(MF))
+ MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
+
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (RegInfo->hasBasePointer(MF))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp. We can assume there's an FP here since hasFP already
+ // checks for hasVarSizedObjects.
+ if (MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+ if (MI->getOpcode() == ARM::tRestore &&
+ MI->getOperand(1).isFI() &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
+ return true;
+ else if (MI->getOpcode() == ARM::tPOP) {
+ // The first two operands are predicates. The last two are
+ // imp-def and imp-use of SP. Check everything in between.
+ for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i)
+ if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+ return false;
+ return true;
+ }
+ return false;
+}
+
+void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert((MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::tPOP_RET) &&
+ "Can only insert epilog into returning blocks");
+ DebugLoc dl = MBBI->getDebugLoc();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+ } else {
+ // Unwind MBBI to point to first LDR / VLDRD.
+ if (MBBI != MBB.begin()) {
+ do
+ --MBBI;
+ while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+ if (!isCSRestore(MBBI, CSRegs))
+ ++MBBI;
+ }
+
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedAreaSize());
+
+ if (AFI->shouldRestoreSPFromFP()) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot, the target is ELF and the function has FP, or
+ // the target uses var sized objects.
+ if (NumBytes) {
+ assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ "No scratch register to restore SP from FP!");
+ emitThumbRegPlusImmediate(MBB, MBBI, ARM::R4, FramePtr, -NumBytes,
+ TII, *RegInfo, dl);
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+ .addReg(ARM::R4);
+ } else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+ .addReg(FramePtr);
+ } else {
+ if (MBBI->getOpcode() == ARM::tBX_RET &&
+ &MBB.front() != MBBI &&
+ prior(MBBI)->getOpcode() == ARM::tPOP) {
+ MachineBasicBlock::iterator PMBBI = prior(MBBI);
+ emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
+ } else
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+ }
+ }
+
+ if (VARegSaveSize) {
+ // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+ // to LR, and we can't pop the value directly to the PC since
+ // we need to update the SP after popping the value. Therefore, we
+ // pop the old LR into R3 as a temporary.
+
+ // Move back past the callee-saved register restoration
+ while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs))
+ ++MBBI;
+ // Epilogue for vararg functions: pop LR to R3 and branch off it.
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ .addReg(ARM::R3, RegState::Define);
+
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
+ .addReg(ARM::R3, RegState::Kill);
+ // erase the old tBX_RET instruction
+ MBB.erase(MBBI);
+ }
+}
+
+bool Thumb1FrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
+ AddDefaultPred(MIB);
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ bool isKill = true;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+ // then it's already added to the function and entry block live-in sets.
+ if (Reg == ARM::LR) {
+ MachineFunction &MF = *MBB.getParent();
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ MIB.addReg(Reg, getKillRegState(isKill));
+ }
+ return true;
+}
+
+bool Thumb1FrameLowering::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ DebugLoc DL = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
+ AddDefaultPred(MIB);
+
+ bool NumRegs = false;
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (Reg == ARM::LR) {
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (isVarArg)
+ continue;
+ Reg = ARM::PC;
+ (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ MI = MBB.erase(MI);
+ }
+ MIB.addReg(Reg, getDefRegState(true));
+ NumRegs = true;
+ }
+
+ // It's illegal to emit pop instruction without operands.
+ if (NumRegs)
+ MBB.insert(MI, &*MIB);
+ else
+ MF.DeleteMachineInstr(MIB);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
new file mode 100644
index 0000000..c592e12
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -0,0 +1,52 @@
+//===-- Thumb1FrameLowering.h - Thumb1-specific frame info stuff --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __THUMB_FRAMEINFO_H_
+#define __THUMM_FRAMEINFO_H_
+
+#include "ARM.h"
+#include "ARMFrameLowering.h"
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class Thumb1FrameLowering : public ARMFrameLowering {
+public:
+ explicit Thumb1FrameLowering(const ARMSubtarget &sti)
+ : ARMFrameLowering(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
new file mode 100644
index 0000000..3fbb433
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -0,0 +1,111 @@
+//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1InstrInfo.h"
+#include "ARM.h"
+#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/ADT/SmallVector.h"
+#include "Thumb1InstrInfo.h"
+
+using namespace llvm;
+
+Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
+ return 0;
+}
+
+void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool tDest = ARM::tGPRRegClass.contains(DestReg);
+ bool tSrc = ARM::tGPRRegClass.contains(SrcReg);
+ unsigned Opc = ARM::tMOVgpr2gpr;
+ if (tDest && tSrc)
+ Opc = ARM::tMOVr;
+ else if (tSrc)
+ Opc = ARM::tMOVtgpr2gpr;
+ else if (tDest)
+ Opc = ARM::tMOVgpr2tgpr;
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
+ "Thumb1 can only copy GPR registers");
+}
+
+void Thumb1InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert((RC == ARM::tGPRRegisterClass ||
+ (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ isARMLowRegister(SrcReg))) && "Unknown regclass!");
+
+ if (RC == ARM::tGPRRegisterClass ||
+ (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ isARMLowRegister(SrcReg))) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSpill))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ }
+}
+
+void Thumb1InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert((RC == ARM::tGPRRegisterClass ||
+ (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
+ isARMLowRegister(DestReg))) && "Unknown regclass!");
+
+ if (RC == ARM::tGPRRegisterClass ||
+ (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
+ isARMLowRegister(DestReg))) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
new file mode 100644
index 0000000..17ef2f7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -0,0 +1,59 @@
+//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1INSTRUCTIONINFO_H
+#define THUMB1INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class Thumb1InstrInfo : public ARMBaseInstrInfo {
+ Thumb1RegisterInfo RI;
+public:
+ explicit Thumb1InstrInfo(const ARMSubtarget &STI);
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ unsigned getUnindexedOpcode(unsigned Opc) const;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const Thumb1RegisterInfo &getRegisterInfo() const { return RI; }
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+};
+}
+
+#endif // THUMB1INSTRUCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
new file mode 100644
index 0000000..f62a13e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -0,0 +1,695 @@
+//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+extern cl::opt<bool> ReuseFrameIndexVals;
+}
+
+using namespace llvm;
+
+Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(tii, sti) {
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx,
+ int Val,
+ ARMCC::CondCodes Pred,
+ unsigned PredReg) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C = ConstantInt::get(
+ Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg);
+}
+
+
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
+/// in a register using mov / mvn sequences or load the immediate from a
+/// constpool entry.
+static
+void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, bool CanChangeCC,
+ const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ DebugLoc dl) {
+ MachineFunction &MF = *MBB.getParent();
+ bool isHigh = !isARMLowRegister(DestReg) ||
+ (BaseReg != 0 && !isARMLowRegister(BaseReg));
+ bool isSub = false;
+ // Subtract doesn't have high register version. Load the negative value
+ // if either base or dest register is a high register. Also, if do not
+ // issue sub as part of the sequence if condition register is to be
+ // preserved.
+ if (NumBytes < 0 && !isHigh && CanChangeCC) {
+ isSub = true;
+ NumBytes = -NumBytes;
+ }
+ unsigned LdReg = DestReg;
+ if (DestReg == ARM::SP) {
+ assert(BaseReg == ARM::SP && "Unexpected!");
+ LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
+ }
+
+ if (NumBytes <= 255 && NumBytes >= 0)
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ .addImm(NumBytes);
+ else if (NumBytes < 0 && NumBytes >= -255) {
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ .addImm(NumBytes);
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
+ .addReg(LdReg, RegState::Kill);
+ } else
+ MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes);
+
+ // Emit add / sub.
+ int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+ if (Opc != ARM::tADDhirr)
+ MIB = AddDefaultT1CC(MIB);
+ if (DestReg == ARM::SP || isSub)
+ MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
+ else
+ MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
+ AddDefaultPred(MIB);
+}
+
+/// calcNumMI - Returns the number of instructions required to materialize
+/// the specific add / sub r, c instruction.
+static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
+ unsigned NumBits, unsigned Scale) {
+ unsigned NumMIs = 0;
+ unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+
+ if (Opc == ARM::tADDrSPi) {
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ NumMIs++;
+ NumBits = 8;
+ Scale = 1; // Followed by a number of tADDi8.
+ Chunk = ((1 << NumBits) - 1) * Scale;
+ }
+
+ NumMIs += Bytes / Chunk;
+ if ((Bytes % Chunk) != 0)
+ NumMIs++;
+ if (ExtraOpc)
+ NumMIs++;
+ return NumMIs;
+}
+
+/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ DebugLoc dl) {
+ bool isSub = NumBytes < 0;
+ unsigned Bytes = (unsigned)NumBytes;
+ if (isSub) Bytes = -NumBytes;
+ bool isMul4 = (Bytes & 3) == 0;
+ bool isTwoAddr = false;
+ bool DstNotEqBase = false;
+ unsigned NumBits = 1;
+ unsigned Scale = 1;
+ int Opc = 0;
+ int ExtraOpc = 0;
+ bool NeedCC = false;
+ bool NeedPred = false;
+
+ if (DestReg == BaseReg && BaseReg == ARM::SP) {
+ assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+ NumBits = 7;
+ Scale = 4;
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ isTwoAddr = true;
+ } else if (!isSub && BaseReg == ARM::SP) {
+ // r1 = add sp, 403
+ // =>
+ // r1 = add sp, 100 * 4
+ // r1 = add r1, 3
+ if (!isMul4) {
+ Bytes &= ~3;
+ ExtraOpc = ARM::tADDi3;
+ }
+ NumBits = 8;
+ Scale = 4;
+ Opc = ARM::tADDrSPi;
+ } else {
+ // sp = sub sp, c
+ // r1 = sub sp, c
+ // r8 = sub sp, c
+ if (DestReg != BaseReg)
+ DstNotEqBase = true;
+ NumBits = 8;
+ if (DestReg == ARM::SP) {
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+ NumBits = 7;
+ Scale = 4;
+ } else {
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ NumBits = 8;
+ NeedPred = NeedCC = true;
+ }
+ isTwoAddr = true;
+ }
+
+ unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+ unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
+ if (NumMIs > Threshold) {
+ // This will expand into too many instructions. Load the immediate from a
+ // constpool entry.
+ emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII,
+ MRI, dl);
+ return;
+ }
+
+ if (DstNotEqBase) {
+ if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
+ // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+ unsigned Chunk = (1 << 3) - 1;
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ const TargetInstrDesc &TID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
+ const MachineInstrBuilder MIB =
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg));
+ AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(BaseReg, RegState::Kill);
+ }
+ BaseReg = DestReg;
+ }
+
+ unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+ while (Bytes) {
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ ThisVal /= Scale;
+ // Build the new tADD / tSUB.
+ if (isTwoAddr) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+ if (NeedCC)
+ MIB = AddDefaultT1CC(MIB);
+ MIB .addReg(DestReg).addImm(ThisVal);
+ if (NeedPred)
+ MIB = AddDefaultPred(MIB);
+ }
+ else {
+ bool isKill = BaseReg != ARM::SP;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+ if (NeedCC)
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
+ if (NeedPred)
+ MIB = AddDefaultPred(MIB);
+ BaseReg = DestReg;
+
+ if (Opc == ARM::tADDrSPi) {
+ // r4 = add sp, imm
+ // r4 = add r4, imm
+ // ...
+ NumBits = 8;
+ Scale = 1;
+ Chunk = ((1 << NumBits) - 1) * Scale;
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ NeedPred = NeedCC = isTwoAddr = true;
+ }
+ }
+ }
+
+ if (ExtraOpc) {
+ const TargetInstrDesc &TID = TII.get(ExtraOpc);
+ AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg))
+ .addReg(DestReg, RegState::Kill)
+ .addImm(((unsigned)NumBytes) & 3));
+ }
+}
+
+static void emitSPUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetInstrInfo &TII, DebugLoc dl,
+ const Thumb1RegisterInfo &MRI,
+ int NumBytes) {
+ emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
+ MRI, dl);
+}
+
+void Thumb1RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = TFI->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ emitSPUpdate(MBB, I, TII, dl, *this, -Amount);
+ } else {
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(MBB, I, TII, dl, *this, Amount);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, int Imm,
+ const TargetInstrInfo &TII,
+ const Thumb1RegisterInfo& MRI,
+ DebugLoc dl) {
+ bool isSub = Imm < 0;
+ if (isSub) Imm = -Imm;
+
+ int Chunk = (1 << 8) - 1;
+ int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+ Imm -= ThisVal;
+ AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8),
+ DestReg))
+ .addImm(ThisVal));
+ if (Imm > 0)
+ emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl);
+ if (isSub) {
+ const TargetInstrDesc &TID = TII.get(ARM::tRSB);
+ AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg))
+ .addReg(DestReg, RegState::Kill));
+ }
+}
+
+static void removeOperands(MachineInstr &MI, unsigned i) {
+ unsigned Op = i;
+ for (unsigned e = MI.getNumOperands(); i != e; ++i)
+ MI.RemoveOperand(Op);
+}
+
+/// convertToNonSPOpcode - Change the opcode to the non-SP version, because
+/// we're replacing the frame index with a non-SP register.
+static unsigned convertToNonSPOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::tLDRspi:
+ case ARM::tRestore: // FIXME: Should this opcode be here?
+ return ARM::tLDRi;
+
+ case ARM::tSTRspi:
+ case ARM::tSpill: // FIXME: Should this opcode be here?
+ return ARM::tSTRi;
+ }
+
+ return Opcode;
+}
+
+bool Thumb1RegisterInfo::
+rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const {
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+
+ if (Opcode == ARM::tADDrSPi) {
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+ // Can't use tADDrSPi if it's based off the frame pointer.
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ if (FrameReg != ARM::SP) {
+ Opcode = ARM::tADDi3;
+ MI.setDesc(TII.get(Opcode));
+ NumBits = 3;
+ } else {
+ NumBits = 8;
+ Scale = 4;
+ assert((Offset & 3) == 0 &&
+ "Thumb add/sub sp, #imm immediate must be multiple of 4!");
+ }
+
+ unsigned PredReg;
+ if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::tMOVgpr2tgpr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ // Remove offset and remaining explicit predicate operands.
+ do MI.RemoveOperand(FrameRegIdx+1);
+ while (MI.getNumOperands() > FrameRegIdx+1 &&
+ (!MI.getOperand(FrameRegIdx+1).isReg() ||
+ !MI.getOperand(FrameRegIdx+1).isImm()));
+ return true;
+ }
+
+ // Common case: small offset, fits into instruction.
+ unsigned Mask = (1 << NumBits) - 1;
+ if (((Offset / Scale) & ~Mask) == 0) {
+ // Replace the FrameIndex with sp / fp
+ if (Opcode == ARM::tADDi3) {
+ removeOperands(MI, FrameRegIdx);
+ MachineInstrBuilder MIB(&MI);
+ AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
+ .addImm(Offset / Scale));
+ } else {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
+ }
+ return true;
+ }
+
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned Bytes = (Offset > 0) ? Offset : -Offset;
+ unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
+ // MI would expand into a large number of instructions. Don't try to
+ // simplify the immediate.
+ if (NumMIs > 2) {
+ emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
+ *this, dl);
+ MBB.erase(II);
+ return true;
+ }
+
+ if (Offset > 0) {
+ // Translate r0 = add sp, imm to
+ // r0 = add sp, 255*4
+ // r0 = add r0, (imm - 255*4)
+ if (Opcode == ARM::tADDi3) {
+ removeOperands(MI, FrameRegIdx);
+ MachineInstrBuilder MIB(&MI);
+ AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
+ } else {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
+ }
+ Offset = (Offset - Mask * Scale);
+ MachineBasicBlock::iterator NII = llvm::next(II);
+ emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
+ *this, dl);
+ } else {
+ // Translate r0 = add sp, -imm to
+ // r0 = -imm (this is then translated into a series of instructons)
+ // r0 = add r0, sp
+ emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
+
+ MI.setDesc(TII.get(ARM::tADDhirr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
+ MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
+ if (Opcode == ARM::tADDi3) {
+ MachineInstrBuilder MIB(&MI);
+ AddDefaultPred(MIB);
+ }
+ }
+ return true;
+ } else {
+ if (AddrMode != ARMII::AddrModeT1_s)
+ llvm_unreachable("Unsupported addressing mode!");
+
+ unsigned ImmIdx = FrameRegIdx + 1;
+ int InstrOffs = MI.getOperand(ImmIdx).getImm();
+ unsigned NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+ unsigned Scale = 4;
+
+ Offset += InstrOffs * Scale;
+ assert((Offset & (Scale - 1)) == 0 && "Can't encode this offset!");
+
+ // Common case: small offset, fits into instruction.
+ MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with the frame register (e.g., sp).
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ ImmOp.ChangeToImmediate(ImmedOffset);
+
+ // If we're using a register where sp was stored, convert the instruction
+ // to the non-SP version.
+ unsigned NewOpc = convertToNonSPOpcode(Opcode);
+ if (NewOpc != Opcode && FrameReg != ARM::SP)
+ MI.setDesc(TII.get(NewOpc));
+
+ return true;
+ }
+
+ NumBits = 5;
+ Mask = (1 << NumBits) - 1;
+
+ // If this is a thumb spill / restore, we will be using a constpool load to
+ // materialize the offset.
+ if (Opcode == ARM::tRestore || Opcode == ARM::tSpill) {
+ ImmOp.ChangeToImmediate(0);
+ } else {
+ // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+ ImmedOffset = ImmedOffset & Mask;
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask * Scale);
+ }
+ }
+
+ return Offset == 0;
+}
+
+void
+Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = false;
+ Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
+ assert (Done && "Unable to resolve frame index!");
+}
+
+/// saveScavengerRegister - Spill the register so it can be used by the
+/// register scavenger. Return true.
+bool
+Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const {
+ // Thumb1 can't use the emergency spill slot on the stack because
+ // ldr/str immediate offsets must be positive, and if we're referencing
+ // off the frame pointer (if, for example, there are alloca() calls in
+ // the function, the offset will be negative. Use R12 instead since that's
+ // a call clobbered register that we know won't be used in Thumb1 mode.
+ DebugLoc DL;
+ BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)).
+ addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill);
+
+ // The UseMI is where we would like to restore the register. If there's
+ // interference with R12 before then, however, we'll need to restore it
+ // before that instead and adjust the UseMI.
+ bool done = false;
+ for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
+ if (II->isDebugValue())
+ continue;
+ // If this instruction affects R12, adjust our restore point.
+ for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = II->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ if (MO.getReg() == ARM::R12) {
+ UseMI = II;
+ done = true;
+ break;
+ }
+ }
+ }
+ // Restore the register from R12
+ BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)).
+ addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill);
+
+ return true;
+}
+
+void
+Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ unsigned VReg = 0;
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc dl = MI.getDebugLoc();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned FrameReg = ARM::SP;
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MF.getFrameInfo()->getStackSize() + SPAdj;
+
+ if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ "Unexpected");
+ // There are alloca()'s in this function, must reference off the frame
+ // pointer or base pointer instead.
+ if (!hasBasePointer(MF)) {
+ FrameReg = getFrameRegister(MF);
+ Offset -= AFI->getFramePtrSpillOffset();
+ } else
+ FrameReg = BasePtr;
+ }
+
+ // Special handling of dbg_value instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // Modify MI as necessary to handle as much of 'Offset' as possible
+ assert(AFI->isThumbFunction() &&
+ "This eliminateFrameIndex only supports Thumb1!");
+ if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII))
+ return;
+
+ // If we get here, the immediate doesn't fit into the instruction. We folded
+ // as much as possible above, handle the rest, providing a register that is
+ // SP+LargeImm.
+ assert(Offset && "This code isn't needed if offset already handled!");
+
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDesc &Desc = MI.getDesc();
+
+ // Remove predicate first.
+ int PIdx = MI.findFirstPredOperandIdx();
+ if (PIdx != -1)
+ removeOperands(MI, PIdx);
+
+ if (Desc.mayLoad()) {
+ // Use the destination register to materialize sp + offset.
+ unsigned TmpReg = MI.getOperand(0).getReg();
+ bool UseRR = false;
+ if (Opcode == ARM::tRestore) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
+ Offset, false, TII, *this, dl);
+ else {
+ emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset);
+ UseRR = true;
+ }
+ } else {
+ emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
+ *this, dl);
+ }
+
+ MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
+ MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+ if (UseRR)
+ // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+ // register. The offset is already handled in the vreg value.
+ MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+ } else if (Desc.mayStore()) {
+ VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
+ bool UseRR = false;
+
+ if (Opcode == ARM::tSpill) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, VReg, FrameReg,
+ Offset, false, TII, *this, dl);
+ else {
+ emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
+ UseRR = true;
+ }
+ } else
+ emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII,
+ *this, dl);
+ MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
+ MI.getOperand(i).ChangeToRegister(VReg, false, false, true);
+ if (UseRR)
+ // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+ // register. The offset is already handled in the vreg value.
+ MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+ } else {
+ assert(false && "Unexpected opcode!");
+ }
+
+ // Add predicate back if it's needed.
+ if (MI.getDesc().isPredicable()) {
+ MachineInstrBuilder MIB(&MI);
+ AddDefaultPred(MIB);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
new file mode 100644
index 0000000..8a87cc5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -0,0 +1,63 @@
+//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1REGISTERINFO_H
+#define THUMB1REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+ class Type;
+
+struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
+public:
+ Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0) const;
+
+ /// Code Generation virtual methods...
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
+ // however much remains to be handled. Return 'true' if no further
+ // work is required.
+ bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
+ bool saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+};
+}
+
+#endif // THUMB1REGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
new file mode 100644
index 0000000..45e6937
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -0,0 +1,255 @@
+//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "thumb2-it"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumITs, "Number of IT blocks inserted");
+STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
+
+namespace {
+ class Thumb2ITBlockPass : public MachineFunctionPass {
+ bool PreRegAlloc;
+
+ public:
+ static char ID;
+ Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+
+ const Thumb2InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ ARMFunctionInfo *AFI;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "Thumb IT blocks insertion pass";
+ }
+
+ private:
+ bool MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses);
+ bool InsertITInstructions(MachineBasicBlock &MBB);
+ };
+ char Thumb2ITBlockPass::ID = 0;
+}
+
+/// TrackDefUses - Tracking what registers are being defined and used by
+/// instructions in the IT block. This also tracks "dependencies", i.e. uses
+/// in the IT block that are defined before the IT instruction.
+static void TrackDefUses(MachineInstr *MI,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 4> LocalDefs;
+ SmallVector<unsigned, 4> LocalUses;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP)
+ continue;
+ if (MO.isUse())
+ LocalUses.push_back(Reg);
+ else
+ LocalDefs.push_back(Reg);
+ }
+
+ for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+ unsigned Reg = LocalUses[i];
+ Uses.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Uses.insert(*Subreg);
+ }
+
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Reg = LocalDefs[i];
+ Defs.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Defs.insert(*Subreg);
+ if (Reg == ARM::CPSR)
+ continue;
+ }
+}
+
+static bool isCopy(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case ARM::MOVr:
+ case ARM::MOVr_TC:
+ case ARM::tMOVr:
+ case ARM::tMOVgpr2tgpr:
+ case ARM::tMOVtgpr2gpr:
+ case ARM::tMOVgpr2gpr:
+ case ARM::t2MOVr:
+ return true;
+ }
+}
+
+bool
+Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses) {
+ if (!isCopy(MI))
+ return false;
+ // llvm models select's as two-address instructions. That means a copy
+ // is inserted before a t2MOVccr, etc. If the copy is scheduled in
+ // between selects we would end up creating multiple IT blocks.
+ assert(MI->getOperand(0).getSubReg() == 0 &&
+ MI->getOperand(1).getSubReg() == 0 &&
+ "Sub-register indices still around?");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+
+ // First check if it's safe to move it.
+ if (Uses.count(DstReg) || Defs.count(SrcReg))
+ return false;
+
+ // Then peek at the next instruction to see if it's predicated on CC or OCC.
+ // If not, then there is nothing to be gained by moving the copy.
+ MachineBasicBlock::iterator I = MI; ++I;
+ MachineBasicBlock::iterator E = MI->getParent()->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I != E) {
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
+ if (NCC == CC || NCC == OCC)
+ return true;
+ }
+ return false;
+}
+
+bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ SmallSet<unsigned, 4> Defs;
+ SmallSet<unsigned, 4> Uses;
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineInstr *MI = &*MBBI;
+ DebugLoc dl = MI->getDebugLoc();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ if (CC == ARMCC::AL) {
+ ++MBBI;
+ continue;
+ }
+
+ Defs.clear();
+ Uses.clear();
+ TrackDefUses(MI, Defs, Uses, TRI);
+
+ // Insert an IT instruction.
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
+ .addImm(CC);
+
+ // Add implicit use of ITSTATE to IT block instructions.
+ MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+
+ MachineInstr *LastITMI = MI;
+ MachineBasicBlock::iterator InsertPos = MIB;
+ ++MBBI;
+
+ // Form IT block.
+ ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+ unsigned Mask = 0, Pos = 3;
+ // Branches, including tricky ones like LDM_RET, need to end an IT
+ // block so check the instruction we just put in the block.
+ for (; MBBI != E && Pos &&
+ (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
+ if (MBBI->isDebugValue())
+ continue;
+
+ MachineInstr *NMI = &*MBBI;
+ MI = NMI;
+
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg);
+ if (NCC == CC || NCC == OCC) {
+ Mask |= (NCC & 1) << Pos;
+ // Add implicit use of ITSTATE.
+ NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+ LastITMI = NMI;
+ } else {
+ if (NCC == ARMCC::AL &&
+ MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
+ --MBBI;
+ MBB.remove(NMI);
+ MBB.insert(InsertPos, NMI);
+ ++NumMovedInsts;
+ continue;
+ }
+ break;
+ }
+ TrackDefUses(NMI, Defs, Uses, TRI);
+ --Pos;
+ }
+
+ // Finalize IT mask.
+ Mask |= (1 << Pos);
+ // Tag along (firstcond[0] << 4) with the mask.
+ Mask |= (CC & 1) << 4;
+ MIB.addImm(Mask);
+
+ // Last instruction in IT block kills ITSTATE.
+ LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+
+ Modified = true;
+ ++NumITs;
+ }
+
+ return Modified;
+}
+
+bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetMachine &TM = Fn.getTarget();
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+ TRI = TM.getRegisterInfo();
+
+ if (!AFI->isThumbFunction())
+ return false;
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) {
+ MachineBasicBlock &MBB = *MFI;
+ ++MFI;
+ Modified |= InsertITInstructions(MBB);
+ }
+
+ if (Modified)
+ AFI->setHasITBlocks(true);
+
+ return Modified;
+}
+
+/// createThumb2ITBlockPass - Returns an instance of the Thumb2 IT blocks
+/// insertion pass.
+FunctionPass *llvm::createThumb2ITBlockPass() {
+ return new Thumb2ITBlockPass();
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
new file mode 100644
index 0000000..2f67257
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -0,0 +1,616 @@
+//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb2InstrInfo.h"
+#include "ARM.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMAddressingModes.h"
+#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
+ cl::desc("Use old-style Thumb2 if-conversion heuristics"),
+ cl::init(false));
+
+Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
+ // FIXME
+ return 0;
+}
+
+void
+Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+ ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+ if (!AFI->hasITBlocks()) {
+ TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+ return;
+ }
+
+ // If the first instruction of Tail is predicated, we may have to update
+ // the IT instruction.
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg);
+ MachineBasicBlock::iterator MBBI = Tail;
+ if (CC != ARMCC::AL)
+ // Expecting at least the t2IT instruction before it.
+ --MBBI;
+
+ // Actually replace the tail.
+ TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+
+ // Fix up IT.
+ if (CC != ARMCC::AL) {
+ MachineBasicBlock::iterator E = MBB->begin();
+ unsigned Count = 4; // At most 4 instructions in an IT block.
+ while (Count && MBBI != E) {
+ if (MBBI->isDebugValue()) {
+ --MBBI;
+ continue;
+ }
+ if (MBBI->getOpcode() == ARM::t2IT) {
+ unsigned Mask = MBBI->getOperand(1).getImm();
+ if (Count == 4)
+ MBBI->eraseFromParent();
+ else {
+ unsigned MaskOn = 1 << Count;
+ unsigned MaskOff = ~(MaskOn - 1);
+ MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn);
+ }
+ return;
+ }
+ --MBBI;
+ --Count;
+ }
+
+ // Ctrl flow can reach here if branch folding is run before IT block
+ // formation pass.
+ }
+}
+
+bool
+Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const {
+ unsigned PredReg = 0;
+ return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
+}
+
+void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // Handle SPR, DPR, and QPR copies.
+ if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
+ return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
+
+ bool tDest = ARM::tGPRRegClass.contains(DestReg);
+ bool tSrc = ARM::tGPRRegClass.contains(SrcReg);
+ unsigned Opc = ARM::tMOVgpr2gpr;
+ if (tDest && tSrc)
+ Opc = ARM::tMOVr;
+ else if (tSrc)
+ Opc = ARM::tMOVtgpr2gpr;
+ else if (tDest)
+ Opc = ARM::tMOVgpr2tgpr;
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void Thumb2InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ return;
+ }
+
+ ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
+}
+
+void Thumb2InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ return;
+ }
+
+ ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
+}
+
+void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII) {
+ bool isSub = NumBytes < 0;
+ if (isSub) NumBytes = -NumBytes;
+
+ // If profitable, use a movw or movt to materialize the offset.
+ // FIXME: Use the scavenger to grab a scratch register.
+ if (DestReg != ARM::SP && DestReg != BaseReg &&
+ NumBytes >= 4096 &&
+ ARM_AM::getT2SOImmVal(NumBytes) == -1) {
+ bool Fits = false;
+ if (NumBytes < 65536) {
+ // Use a movw to materialize the 16-bit constant.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg)
+ .addImm(NumBytes)
+ .addImm((unsigned)Pred).addReg(PredReg);
+ Fits = true;
+ } else if ((NumBytes & 0xffff) == 0) {
+ // Use a movt to materialize the 32-bit constant.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg)
+ .addReg(DestReg)
+ .addImm(NumBytes >> 16)
+ .addImm((unsigned)Pred).addReg(PredReg);
+ Fits = true;
+ }
+
+ if (Fits) {
+ if (isSub) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addReg(DestReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ }
+ return;
+ }
+ }
+
+ while (NumBytes) {
+ unsigned ThisVal = NumBytes;
+ unsigned Opc = 0;
+ if (DestReg == ARM::SP && BaseReg != ARM::SP) {
+ // mov sp, rn. Note t2MOVr cannot be used.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg).addReg(BaseReg);
+ BaseReg = ARM::SP;
+ continue;
+ }
+
+ bool HasCCOut = true;
+ if (BaseReg == ARM::SP) {
+ // sub sp, sp, #imm7
+ if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
+ assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ // FIXME: Fix Thumb1 immediate encoding.
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg).addImm(ThisVal/4);
+ NumBytes = 0;
+ continue;
+ }
+
+ // sub rd, sp, so_imm
+ Opc = isSub ? ARM::t2SUBrSPi : ARM::t2ADDrSPi;
+ if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+ NumBytes = 0;
+ } else {
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
+ }
+ } else {
+ assert(DestReg != ARM::SP && BaseReg != ARM::SP);
+ Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+ if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+ NumBytes = 0;
+ } else if (ThisVal < 4096) {
+ Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+ HasCCOut = false;
+ NumBytes = 0;
+ } else {
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
+ }
+ }
+
+ // Build the new ADD / SUB.
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm(ThisVal));
+ if (HasCCOut)
+ AddDefaultCC(MIB);
+
+ BaseReg = DestReg;
+ }
+}
+
+static unsigned
+negativeOffsetOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ case ARM::t2LDRi12: return ARM::t2LDRi8;
+ case ARM::t2LDRHi12: return ARM::t2LDRHi8;
+ case ARM::t2LDRBi12: return ARM::t2LDRBi8;
+ case ARM::t2LDRSHi12: return ARM::t2LDRSHi8;
+ case ARM::t2LDRSBi12: return ARM::t2LDRSBi8;
+ case ARM::t2STRi12: return ARM::t2STRi8;
+ case ARM::t2STRBi12: return ARM::t2STRBi8;
+ case ARM::t2STRHi12: return ARM::t2STRHi8;
+
+ case ARM::t2LDRi8:
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSBi8:
+ case ARM::t2STRi8:
+ case ARM::t2STRBi8:
+ case ARM::t2STRHi8:
+ return opcode;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static unsigned
+positiveOffsetOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ case ARM::t2LDRi8: return ARM::t2LDRi12;
+ case ARM::t2LDRHi8: return ARM::t2LDRHi12;
+ case ARM::t2LDRBi8: return ARM::t2LDRBi12;
+ case ARM::t2LDRSHi8: return ARM::t2LDRSHi12;
+ case ARM::t2LDRSBi8: return ARM::t2LDRSBi12;
+ case ARM::t2STRi8: return ARM::t2STRi12;
+ case ARM::t2STRBi8: return ARM::t2STRBi12;
+ case ARM::t2STRHi8: return ARM::t2STRHi12;
+
+ case ARM::t2LDRi12:
+ case ARM::t2LDRHi12:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSBi12:
+ case ARM::t2STRi12:
+ case ARM::t2STRBi12:
+ case ARM::t2STRHi12:
+ return opcode;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static unsigned
+immediateOffsetOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ case ARM::t2LDRs: return ARM::t2LDRi12;
+ case ARM::t2LDRHs: return ARM::t2LDRHi12;
+ case ARM::t2LDRBs: return ARM::t2LDRBi12;
+ case ARM::t2LDRSHs: return ARM::t2LDRSHi12;
+ case ARM::t2LDRSBs: return ARM::t2LDRSBi12;
+ case ARM::t2STRs: return ARM::t2STRi12;
+ case ARM::t2STRBs: return ARM::t2STRBi12;
+ case ARM::t2STRHs: return ARM::t2STRHi12;
+
+ case ARM::t2LDRi12:
+ case ARM::t2LDRHi12:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSBi12:
+ case ARM::t2STRi12:
+ case ARM::t2STRBi12:
+ case ARM::t2STRHi12:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSBi8:
+ case ARM::t2STRi8:
+ case ARM::t2STRBi8:
+ case ARM::t2STRHi8:
+ return opcode;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) {
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ bool isSub = false;
+
+ // Memory operands in inline assembly always use AddrModeT2_i12.
+ if (Opcode == ARM::INLINEASM)
+ AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2?
+
+ if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+ unsigned PredReg;
+ if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::tMOVgpr2gpr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ // Remove offset and remaining explicit predicate operands.
+ do MI.RemoveOperand(FrameRegIdx+1);
+ while (MI.getNumOperands() > FrameRegIdx+1 &&
+ (!MI.getOperand(FrameRegIdx+1).isReg() ||
+ !MI.getOperand(FrameRegIdx+1).isImm()));
+ return true;
+ }
+
+ bool isSP = FrameReg == ARM::SP;
+ bool HasCCOut = Opcode != ARM::t2ADDri12;
+
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ MI.setDesc(TII.get(isSP ? ARM::t2SUBrSPi : ARM::t2SUBri));
+ } else {
+ MI.setDesc(TII.get(isSP ? ARM::t2ADDrSPi : ARM::t2ADDri));
+ }
+
+ // Common case: small offset, fits into instruction.
+ if (ARM_AM::getT2SOImmVal(Offset) != -1) {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+ // Add cc_out operand if the original instruction did not have one.
+ if (!HasCCOut)
+ MI.addOperand(MachineOperand::CreateReg(0, false));
+ Offset = 0;
+ return true;
+ }
+ // Another common case: imm12.
+ if (Offset < 4096 &&
+ (!HasCCOut || MI.getOperand(MI.getNumOperands()-1).getReg() == 0)) {
+ unsigned NewOpc = isSP
+ ? (isSub ? ARM::t2SUBrSPi12 : ARM::t2ADDrSPi12)
+ : (isSub ? ARM::t2SUBri12 : ARM::t2ADDri12);
+ MI.setDesc(TII.get(NewOpc));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+ // Remove the cc_out operand.
+ if (HasCCOut)
+ MI.RemoveOperand(MI.getNumOperands()-1);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, extract 8 adjacent bits from the immediate into this
+ // t2ADDri/t2SUBri.
+ unsigned RotAmt = CountLeadingZeros_32(Offset);
+ unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt);
+
+ // We will handle these bits from offset, clear them.
+ Offset &= ~ThisImmVal;
+
+ assert(ARM_AM::getT2SOImmVal(ThisImmVal) != -1 &&
+ "Bit extraction didn't work?");
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+ // Add cc_out operand if the original instruction did not have one.
+ if (!HasCCOut)
+ MI.addOperand(MachineOperand::CreateReg(0, false));
+
+ } else {
+
+ // AddrMode4 and AddrMode6 cannot handle any offset.
+ if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+ return false;
+
+ // AddrModeT2_so cannot handle any offset. If there is no offset
+ // register then we change to an immediate version.
+ unsigned NewOpc = Opcode;
+ if (AddrMode == ARMII::AddrModeT2_so) {
+ unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg();
+ if (OffsetReg != 0) {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ return Offset == 0;
+ }
+
+ MI.RemoveOperand(FrameRegIdx+1);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0);
+ NewOpc = immediateOffsetOpcode(Opcode);
+ AddrMode = ARMII::AddrModeT2_i12;
+ }
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ if (AddrMode == ARMII::AddrModeT2_i8 || AddrMode == ARMII::AddrModeT2_i12) {
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign convert Opcode to the appropriate
+ // instruction
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+ if (Offset < 0) {
+ NewOpc = negativeOffsetOpcode(Opcode);
+ NumBits = 8;
+ isSub = true;
+ Offset = -Offset;
+ } else {
+ NewOpc = positiveOffsetOpcode(Opcode);
+ NumBits = 12;
+ }
+ } else if (AddrMode == ARMII::AddrMode5) {
+ // VFP address mode.
+ const MachineOperand &OffOp = MI.getOperand(FrameRegIdx+1);
+ int InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+ if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ Scale = 4;
+ Offset += InstrOffs * 4;
+ assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ }
+ } else {
+ llvm_unreachable("Unsupported addressing mode!");
+ }
+
+ if (NewOpc != Opcode)
+ MI.setDesc(TII.get(NewOpc));
+
+ MachineOperand &ImmOp = MI.getOperand(FrameRegIdx+1);
+
+ // Attempt to fold address computation
+ // Common case: small offset, fits into instruction.
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with fp/sp
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode5)
+ // FIXME: Not consistent.
+ ImmedOffset |= 1 << NumBits;
+ else
+ ImmedOffset = -ImmedOffset;
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, offset doesn't fit. Pull in what we can to simplify
+ ImmedOffset = ImmedOffset & Mask;
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode5)
+ // FIXME: Not consistent.
+ ImmedOffset |= 1 << NumBits;
+ else {
+ ImmedOffset = -ImmedOffset;
+ if (ImmedOffset == 0)
+ // Change the opcode back if the encoded offset is zero.
+ MI.setDesc(TII.get(positiveOffsetOpcode(NewOpc)));
+ }
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask*Scale);
+ }
+
+ Offset = (isSub) ? -Offset : Offset;
+ return Offset == 0;
+}
+
+/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+/// two-addrss instruction inserted by two-address pass.
+void
+Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
+ MachineInstr *UseMI,
+ const TargetRegisterInfo &TRI) const {
+ if (SrcMI->getOpcode() != ARM::tMOVgpr2gpr ||
+ SrcMI->getOperand(1).isKill())
+ return;
+
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg);
+ if (CC == ARMCC::AL || PredReg != ARM::CPSR)
+ return;
+
+ // Schedule the copy so it doesn't come between previous instructions
+ // and UseMI which can form an IT block.
+ unsigned SrcReg = SrcMI->getOperand(1).getReg();
+ ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+ MachineBasicBlock *MBB = UseMI->getParent();
+ MachineBasicBlock::iterator MBBI = SrcMI;
+ unsigned NumInsts = 0;
+ while (--MBBI != MBB->begin()) {
+ if (MBBI->isDebugValue())
+ continue;
+
+ MachineInstr *NMI = &*MBBI;
+ ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg);
+ if (!(NCC == CC || NCC == OCC) ||
+ NMI->modifiesRegister(SrcReg, &TRI) ||
+ NMI->definesRegister(ARM::CPSR))
+ break;
+ if (++NumInsts == 4)
+ // Too many in a row!
+ return;
+ }
+
+ if (NumInsts) {
+ MBB->remove(SrcMI);
+ MBB->insert(++MBBI, SrcMI);
+ }
+}
+
+ARMCC::CondCodes
+llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
+ return ARMCC::AL;
+ return llvm::getInstrPredicate(MI, PredReg);
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
new file mode 100644
index 0000000..f2637d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -0,0 +1,78 @@
+//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2INSTRUCTIONINFO_H
+#define THUMB2INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "Thumb2RegisterInfo.h"
+
+namespace llvm {
+class ARMSubtarget;
+class ScheduleHazardRecognizer;
+
+class Thumb2InstrInfo : public ARMBaseInstrInfo {
+ Thumb2RegisterInfo RI;
+public:
+ explicit Thumb2InstrInfo(const ARMSubtarget &STI);
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ unsigned getUnindexedOpcode(unsigned Opc) const;
+
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const;
+
+ bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const;
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+ /// two-addrss instruction inserted by two-address pass.
+ void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI,
+ const TargetRegisterInfo &TRI) const;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
+};
+
+/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
+/// to llvm::getInstrPredicate except it returns AL for conditional branch
+/// instructions which are "predicated", but are not in IT blocks.
+ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+
+}
+
+#endif // THUMB2INSTRUCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
new file mode 100644
index 0000000..099b8f7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -0,0 +1,61 @@
+//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb2InstrInfo.h"
+#include "Thumb2RegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(tii, sti) {
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx,
+ int Val,
+ ARMCC::CondCodes Pred,
+ unsigned PredReg) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C = ConstantInt::get(
+ Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0);
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h
new file mode 100644
index 0000000..b3cf2e5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -0,0 +1,42 @@
+//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2REGISTERINFO_H
+#define THUMB2REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+ class Type;
+
+struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
+public:
+ Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0) const;
+};
+}
+
+#endif // THUMB2REGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
new file mode 100644
index 0000000..cc8f61c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -0,0 +1,770 @@
+//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "t2-reduce-size"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
+STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
+STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
+
+static cl::opt<int> ReduceLimit("t2-reduce-limit",
+ cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
+ cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
+ cl::init(-1), cl::Hidden);
+
+namespace {
+ /// ReduceTable - A static table with information on mapping from wide
+ /// opcodes to narrow
+ struct ReduceEntry {
+ unsigned WideOpc; // Wide opcode
+ unsigned NarrowOpc1; // Narrow opcode to transform to
+ unsigned NarrowOpc2; // Narrow opcode when it's two-address
+ uint8_t Imm1Limit; // Limit of immediate field (bits)
+ uint8_t Imm2Limit; // Limit of immediate field when it's two-address
+ unsigned LowRegs1 : 1; // Only possible if low-registers are used
+ unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
+ unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa.
+ // 1 - No cc field.
+ // 2 - Always set CPSR.
+ unsigned PredCC2 : 2;
+ unsigned Special : 1; // Needs to be dealt with specially
+ };
+
+ static const ReduceEntry ReduceTable[] = {
+ // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, S
+ { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0 },
+ { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0 },
+ { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0 },
+ // Note: immediate scale is 4.
+ { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 1 },
+ { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 1 },
+ { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 1 },
+ { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 0 },
+ { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 0 },
+ { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 0 },
+ { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 0 },
+ //FIXME: Disable CMN, as CCodes are backwards from compare expectations
+ //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0 },
+ { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0 },
+ { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 1 },
+ { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 0 },
+ // FIXME: adr.n immediate offset must be multiple of 4.
+ //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0 },
+ { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 0 },
+ { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 0 },
+ { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 },
+ { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 },
+ { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 },
+ { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1 },
+ // FIXME: Do we need the 16-bit 'S' variant?
+ { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 },
+ { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 },
+ { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 1, 0,1, 0 },
+ { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 0 },
+ { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0 },
+ { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 0 },
+ { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0 },
+ { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0 },
+ { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0 },
+ { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 0 },
+ { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 1 },
+ { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 1 },
+ { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0 },
+ { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0 },
+ { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0 },
+ { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0 },
+ { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0 },
+ { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0 },
+ { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0 },
+ { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0 },
+ { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0 },
+ { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0 },
+
+ // FIXME: Clean this up after splitting each Thumb load / store opcode
+ // into multiple ones.
+ { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 1 },
+ { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 1 },
+ { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 1 },
+ { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 1 },
+ { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 1 },
+ { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 1 },
+ { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 },
+ { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 },
+ { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 1 },
+ { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 1 },
+ { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 1 },
+ { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 1 },
+ { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 1 },
+ { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 1 },
+
+ { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 1 },
+ { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 },
+ { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 1 },
+ // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+ { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 1 },
+ { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 },
+ };
+
+ class Thumb2SizeReduce : public MachineFunctionPass {
+ public:
+ static char ID;
+ Thumb2SizeReduce();
+
+ const Thumb2InstrInfo *TII;
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "Thumb2 instruction size reduction pass";
+ }
+
+ private:
+ /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
+ DenseMap<unsigned, unsigned> ReduceOpcodeMap;
+
+ bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+ bool is2Addr, ARMCC::CondCodes Pred,
+ bool LiveCPSR, bool &HasCC, bool &CCDead);
+
+ bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry);
+
+ bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry, bool LiveCPSR);
+
+ /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
+ /// instruction.
+ bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR);
+
+ /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
+ /// non-two-address instruction.
+ bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR);
+
+ /// ReduceMBB - Reduce width of instructions in the specified basic block.
+ bool ReduceMBB(MachineBasicBlock &MBB);
+ };
+ char Thumb2SizeReduce::ID = 0;
+}
+
+Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
+ for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
+ unsigned FromOpc = ReduceTable[i].WideOpc;
+ if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
+ assert(false && "Duplicated entries?");
+ }
+}
+
+static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) {
+ for (const unsigned *Regs = TID.ImplicitDefs; *Regs; ++Regs)
+ if (*Regs == ARM::CPSR)
+ return true;
+ return false;
+}
+
+bool
+Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+ bool is2Addr, ARMCC::CondCodes Pred,
+ bool LiveCPSR, bool &HasCC, bool &CCDead) {
+ if ((is2Addr && Entry.PredCC2 == 0) ||
+ (!is2Addr && Entry.PredCC1 == 0)) {
+ if (Pred == ARMCC::AL) {
+ // Not predicated, must set CPSR.
+ if (!HasCC) {
+ // Original instruction was not setting CPSR, but CPSR is not
+ // currently live anyway. It's ok to set it. The CPSR def is
+ // dead though.
+ if (!LiveCPSR) {
+ HasCC = true;
+ CCDead = true;
+ return true;
+ }
+ return false;
+ }
+ } else {
+ // Predicated, must not set CPSR.
+ if (HasCC)
+ return false;
+ }
+ } else if ((is2Addr && Entry.PredCC2 == 2) ||
+ (!is2Addr && Entry.PredCC1 == 2)) {
+ /// Old opcode has an optional def of CPSR.
+ if (HasCC)
+ return true;
+ // If old opcode does not implicitly define CPSR, then it's not ok since
+ // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
+ if (!HasImplicitCPSRDef(MI->getDesc()))
+ return false;
+ HasCC = true;
+ } else {
+ // 16-bit instruction does not set CPSR.
+ if (HasCC)
+ return false;
+ }
+
+ return true;
+}
+
+static bool VerifyLowRegs(MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA ||
+ Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD ||
+ Opc == ARM::t2LDMDB_UPD);
+ bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
+ bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || Reg == ARM::CPSR)
+ continue;
+ if (isPCOk && Reg == ARM::PC)
+ continue;
+ if (isLROk && Reg == ARM::LR)
+ continue;
+ if (Reg == ARM::SP) {
+ if (isSPOk)
+ continue;
+ if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
+ // Special case for these ldr / str with sp as base register.
+ continue;
+ }
+ if (!isARMLowRegister(Reg))
+ return false;
+ }
+ return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry) {
+ if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
+ return false;
+
+ unsigned Scale = 1;
+ bool HasImmOffset = false;
+ bool HasShift = false;
+ bool HasOffReg = true;
+ bool isLdStMul = false;
+ unsigned Opc = Entry.NarrowOpc1;
+ unsigned OpNum = 3; // First 'rest' of operands.
+ uint8_t ImmLimit = Entry.Imm1Limit;
+
+ switch (Entry.WideOpc) {
+ default:
+ llvm_unreachable("Unexpected Thumb2 load / store opcode!");
+ case ARM::t2LDRi12:
+ case ARM::t2STRi12:
+ if (MI->getOperand(1).getReg() == ARM::SP) {
+ Opc = Entry.NarrowOpc2;
+ ImmLimit = Entry.Imm2Limit;
+ HasOffReg = false;
+ }
+
+ Scale = 4;
+ HasImmOffset = true;
+ HasOffReg = false;
+ break;
+ case ARM::t2LDRBi12:
+ case ARM::t2STRBi12:
+ HasImmOffset = true;
+ HasOffReg = false;
+ break;
+ case ARM::t2LDRHi12:
+ case ARM::t2STRHi12:
+ Scale = 2;
+ HasImmOffset = true;
+ HasOffReg = false;
+ break;
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSBs:
+ case ARM::t2LDRSHs:
+ case ARM::t2STRs:
+ case ARM::t2STRBs:
+ case ARM::t2STRHs:
+ HasShift = true;
+ OpNum = 4;
+ break;
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB: {
+ unsigned BaseReg = MI->getOperand(0).getReg();
+ if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA)
+ return false;
+
+ // For the non-writeback version (this one), the base register must be
+ // one of the registers being loaded.
+ bool isOK = false;
+ for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
+ if (MI->getOperand(i).getReg() == BaseReg) {
+ isOK = true;
+ break;
+ }
+ }
+
+ if (!isOK)
+ return false;
+
+ OpNum = 0;
+ isLdStMul = true;
+ break;
+ }
+ case ARM::t2LDMIA_RET: {
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg != ARM::SP)
+ return false;
+ Opc = Entry.NarrowOpc2; // tPOP_RET
+ OpNum = 2;
+ isLdStMul = true;
+ break;
+ }
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
+ OpNum = 0;
+
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == ARM::SP &&
+ (Entry.WideOpc == ARM::t2LDMIA_UPD ||
+ Entry.WideOpc == ARM::t2STMDB_UPD)) {
+ Opc = Entry.NarrowOpc2; // tPOP or tPUSH
+ OpNum = 2;
+ } else if (!isARMLowRegister(BaseReg) ||
+ (Entry.WideOpc != ARM::t2LDMIA_UPD &&
+ Entry.WideOpc != ARM::t2STMIA_UPD)) {
+ return false;
+ }
+
+ isLdStMul = true;
+ break;
+ }
+ }
+
+ unsigned OffsetReg = 0;
+ bool OffsetKill = false;
+ if (HasShift) {
+ OffsetReg = MI->getOperand(2).getReg();
+ OffsetKill = MI->getOperand(2).isKill();
+
+ if (MI->getOperand(3).getImm())
+ // Thumb1 addressing mode doesn't support shift.
+ return false;
+ }
+
+ unsigned OffsetImm = 0;
+ if (HasImmOffset) {
+ OffsetImm = MI->getOperand(2).getImm();
+ unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
+
+ if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
+ // Make sure the immediate field fits.
+ return false;
+ }
+
+ // Add the 16-bit load / store instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
+ if (!isLdStMul) {
+ MIB.addOperand(MI->getOperand(0));
+ MIB.addOperand(MI->getOperand(1));
+
+ if (HasImmOffset)
+ MIB.addImm(OffsetImm / Scale);
+
+ assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
+
+ if (HasOffReg)
+ MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
+ }
+
+ // Transfer the rest of operands.
+ for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
+ MIB.addOperand(MI->getOperand(OpNum));
+
+ // Transfer memoperands.
+ (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
+
+ MBB.erase(MI);
+ ++NumLdSts;
+ return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR) {
+ if (Entry.LowRegs1 && !VerifyLowRegs(MI))
+ return false;
+
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.mayLoad() || TID.mayStore())
+ return ReduceLoadStore(MBB, MI, Entry);
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default: break;
+ case ARM::t2ADDSri:
+ case ARM::t2ADDSrr: {
+ unsigned PredReg = 0;
+ if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
+ switch (Opc) {
+ default: break;
+ case ARM::t2ADDSri: {
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR))
+ return true;
+ // fallthrough
+ }
+ case ARM::t2ADDSrr:
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+ }
+ }
+ break;
+ }
+ case ARM::t2RSBri:
+ case ARM::t2RSBSri:
+ if (MI->getOperand(2).getImm() == 0)
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+ break;
+ case ARM::t2MOVi16:
+ // Can convert only 'pure' immediate operands, not immediates obtained as
+ // globals' addresses.
+ if (MI->getOperand(1).isImm())
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+ break;
+ case ARM::t2CMPrr: {
+ // Try to reduce to the lo-reg only version first. Why there are two
+ // versions of the instruction is a mystery.
+ // It would be nice to just have two entries in the master table that
+ // are prioritized, but the table assumes a unique entry for each
+ // source insn opcode. So for now, we hack a local entry record to use.
+ static const ReduceEntry NarrowEntry =
+ { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 1 };
+ if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR))
+ return true;
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+ }
+ case ARM::t2ADDrSPi: {
+ static const ReduceEntry NarrowEntry =
+ { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 1 };
+ if (MI->getOperand(0).getReg() == ARM::SP)
+ return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+ }
+ }
+ return false;
+}
+
+bool
+Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR) {
+
+ if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
+ return false;
+
+ unsigned Reg0 = MI->getOperand(0).getReg();
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ if (Reg0 != Reg1) {
+ // Try to commute the operands to make it a 2-address instruction.
+ unsigned CommOpIdx1, CommOpIdx2;
+ if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
+ CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
+ return false;
+ MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+ if (!CommutedMI)
+ return false;
+ }
+ if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
+ return false;
+ if (Entry.Imm2Limit) {
+ unsigned Imm = MI->getOperand(2).getImm();
+ unsigned Limit = (1 << Entry.Imm2Limit) - 1;
+ if (Imm > Limit)
+ return false;
+ } else {
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
+ return false;
+ }
+
+ // Check if it's possible / necessary to transfer the predicate.
+ const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ bool SkipPred = false;
+ if (Pred != ARMCC::AL) {
+ if (!NewTID.isPredicable())
+ // Can't transfer predicate, fail.
+ return false;
+ } else {
+ SkipPred = !NewTID.isPredicable();
+ }
+
+ bool HasCC = false;
+ bool CCDead = false;
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.hasOptionalDef()) {
+ unsigned NumOps = TID.getNumOperands();
+ HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+ if (HasCC && MI->getOperand(NumOps-1).isDead())
+ CCDead = true;
+ }
+ if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
+ return false;
+
+ // Add the 16-bit instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
+ MIB.addOperand(MI->getOperand(0));
+ if (NewTID.hasOptionalDef()) {
+ if (HasCC)
+ AddDefaultT1CC(MIB, CCDead);
+ else
+ AddNoT1CC(MIB);
+ }
+
+ // Transfer the rest of operands.
+ unsigned NumOps = TID.getNumOperands();
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ if (i < NumOps && TID.OpInfo[i].isOptionalDef())
+ continue;
+ if (SkipPred && TID.OpInfo[i].isPredicate())
+ continue;
+ MIB.addOperand(MI->getOperand(i));
+ }
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
+
+ MBB.erase(MI);
+ ++Num2Addrs;
+ return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR) {
+ if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
+ return false;
+
+ unsigned Limit = ~0U;
+ unsigned Scale = (Entry.WideOpc == ARM::t2ADDrSPi) ? 4 : 1;
+ if (Entry.Imm1Limit)
+ Limit = ((1 << Entry.Imm1Limit) - 1) * Scale;
+
+ const TargetInstrDesc &TID = MI->getDesc();
+ for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+ if (TID.OpInfo[i].isPredicate())
+ continue;
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (!Reg || Reg == ARM::CPSR)
+ continue;
+ if (Entry.WideOpc == ARM::t2ADDrSPi && Reg == ARM::SP)
+ continue;
+ if (Entry.LowRegs1 && !isARMLowRegister(Reg))
+ return false;
+ } else if (MO.isImm() &&
+ !TID.OpInfo[i].isPredicate()) {
+ if (((unsigned)MO.getImm()) > Limit || (MO.getImm() & (Scale-1)) != 0)
+ return false;
+ }
+ }
+
+ // Check if it's possible / necessary to transfer the predicate.
+ const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ bool SkipPred = false;
+ if (Pred != ARMCC::AL) {
+ if (!NewTID.isPredicable())
+ // Can't transfer predicate, fail.
+ return false;
+ } else {
+ SkipPred = !NewTID.isPredicable();
+ }
+
+ bool HasCC = false;
+ bool CCDead = false;
+ if (TID.hasOptionalDef()) {
+ unsigned NumOps = TID.getNumOperands();
+ HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+ if (HasCC && MI->getOperand(NumOps-1).isDead())
+ CCDead = true;
+ }
+ if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
+ return false;
+
+ // Add the 16-bit instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
+ MIB.addOperand(MI->getOperand(0));
+ if (NewTID.hasOptionalDef()) {
+ if (HasCC)
+ AddDefaultT1CC(MIB, CCDead);
+ else
+ AddNoT1CC(MIB);
+ }
+
+ // Transfer the rest of operands.
+ unsigned NumOps = TID.getNumOperands();
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ if (i < NumOps && TID.OpInfo[i].isOptionalDef())
+ continue;
+ if ((TID.getOpcode() == ARM::t2RSBSri ||
+ TID.getOpcode() == ARM::t2RSBri) && i == 2)
+ // Skip the zero immediate operand, it's now implicit.
+ continue;
+ bool isPred = (i < NumOps && TID.OpInfo[i].isPredicate());
+ if (SkipPred && isPred)
+ continue;
+ const MachineOperand &MO = MI->getOperand(i);
+ if (Scale > 1 && !isPred && MO.isImm())
+ MIB.addImm(MO.getImm() / Scale);
+ else {
+ if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
+ // Skip implicit def of CPSR. Either it's modeled as an optional
+ // def now or it's already an implicit def on the new instruction.
+ continue;
+ MIB.addOperand(MO);
+ }
+ }
+ if (!TID.isPredicable() && NewTID.isPredicable())
+ AddDefaultPred(MIB);
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
+
+ MBB.erase(MI);
+ ++NumNarrows;
+ return true;
+}
+
+static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) {
+ bool HasDef = false;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isUse())
+ continue;
+ if (MO.getReg() != ARM::CPSR)
+ continue;
+ if (!MO.isDead())
+ HasDef = true;
+ }
+
+ return HasDef || LiveCPSR;
+}
+
+static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isDef())
+ continue;
+ if (MO.getReg() != ARM::CPSR)
+ continue;
+ assert(LiveCPSR && "CPSR liveness tracking is wrong!");
+ if (MO.isKill()) {
+ LiveCPSR = false;
+ break;
+ }
+ }
+
+ return LiveCPSR;
+}
+
+bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ // Yes, CPSR could be livein.
+ bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
+
+ MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+ MachineBasicBlock::iterator NextMII;
+ for (; MII != E; MII = NextMII) {
+ NextMII = llvm::next(MII);
+
+ MachineInstr *MI = &*MII;
+ LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
+
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
+ if (OPI != ReduceOpcodeMap.end()) {
+ const ReduceEntry &Entry = ReduceTable[OPI->second];
+ // Ignore "special" cases for now.
+ if (Entry.Special) {
+ if (ReduceSpecial(MBB, MI, Entry, LiveCPSR)) {
+ Modified = true;
+ MachineBasicBlock::iterator I = prior(NextMII);
+ MI = &*I;
+ }
+ goto ProcessNext;
+ }
+
+ // Try to transform to a 16-bit two-address instruction.
+ if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) {
+ Modified = true;
+ MachineBasicBlock::iterator I = prior(NextMII);
+ MI = &*I;
+ goto ProcessNext;
+ }
+
+ // Try to transform to a 16-bit non-two-address instruction.
+ if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) {
+ Modified = true;
+ MachineBasicBlock::iterator I = prior(NextMII);
+ MI = &*I;
+ }
+ }
+
+ ProcessNext:
+ LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR);
+ }
+
+ return Modified;
+}
+
+bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
+ const TargetMachine &TM = MF.getTarget();
+ TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+
+ bool Modified = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Modified |= ReduceMBB(*I);
+ return Modified;
+}
+
+/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
+/// reduction pass.
+FunctionPass *llvm::createThumb2SizeReductionPass() {
+ return new Thumb2SizeReduce();
+}
OpenPOWER on IntegriCloud